Fix subgroups tests using basePipelineHandle with index != -1
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / subgroups / vktSubgroupsTestsUtils.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "deFloat16.h"
28 #include "deRandom.hpp"
29 #include "tcuCommandLine.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "vkBarrierUtil.hpp"
32 #include "vkImageUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkObjUtil.hpp"
36 using namespace tcu;
37 using namespace std;
38 using namespace vk;
39 using namespace vkt;
40
41 namespace
42 {
43
44 deUint32 getMaxWidth ()
45 {
46         return 1024u;
47 }
48
49 deUint32 getNextWidth (const deUint32 width)
50 {
51         if (width < 128)
52         {
53                 // This ensures we test every value up to 128 (the max subgroup size).
54                 return width + 1;
55         }
56         else
57         {
58                 // And once we hit 128 we increment to only power of 2's to reduce testing time.
59                 return width * 2;
60         }
61 }
62
63 deUint32 getFormatSizeInBytes(const VkFormat format)
64 {
65         switch (format)
66         {
67                 default:
68                         DE_FATAL("Unhandled format!");
69                         return 0;
70                 case VK_FORMAT_R8_SINT:
71                 case VK_FORMAT_R8_UINT:
72                         return static_cast<deUint32>(sizeof(deInt8));
73                 case VK_FORMAT_R8G8_SINT:
74                 case VK_FORMAT_R8G8_UINT:
75                         return static_cast<deUint32>(sizeof(deInt8) * 2);
76                 case VK_FORMAT_R8G8B8_SINT:
77                 case VK_FORMAT_R8G8B8_UINT:
78                 case VK_FORMAT_R8G8B8A8_SINT:
79                 case VK_FORMAT_R8G8B8A8_UINT:
80                         return static_cast<deUint32>(sizeof(deInt8) * 4);
81                 case VK_FORMAT_R16_SINT:
82                 case VK_FORMAT_R16_UINT:
83                 case VK_FORMAT_R16_SFLOAT:
84                         return static_cast<deUint32>(sizeof(deInt16));
85                 case VK_FORMAT_R16G16_SINT:
86                 case VK_FORMAT_R16G16_UINT:
87                 case VK_FORMAT_R16G16_SFLOAT:
88                         return static_cast<deUint32>(sizeof(deInt16) * 2);
89                 case VK_FORMAT_R16G16B16_UINT:
90                 case VK_FORMAT_R16G16B16_SINT:
91                 case VK_FORMAT_R16G16B16_SFLOAT:
92                 case VK_FORMAT_R16G16B16A16_SINT:
93                 case VK_FORMAT_R16G16B16A16_UINT:
94                 case VK_FORMAT_R16G16B16A16_SFLOAT:
95                         return static_cast<deUint32>(sizeof(deInt16) * 4);
96                 case VK_FORMAT_R32_SINT:
97                 case VK_FORMAT_R32_UINT:
98                 case VK_FORMAT_R32_SFLOAT:
99                         return static_cast<deUint32>(sizeof(deInt32));
100                 case VK_FORMAT_R32G32_SINT:
101                 case VK_FORMAT_R32G32_UINT:
102                 case VK_FORMAT_R32G32_SFLOAT:
103                         return static_cast<deUint32>(sizeof(deInt32) * 2);
104                 case VK_FORMAT_R32G32B32_SINT:
105                 case VK_FORMAT_R32G32B32_UINT:
106                 case VK_FORMAT_R32G32B32_SFLOAT:
107                 case VK_FORMAT_R32G32B32A32_SINT:
108                 case VK_FORMAT_R32G32B32A32_UINT:
109                 case VK_FORMAT_R32G32B32A32_SFLOAT:
110                         return static_cast<deUint32>(sizeof(deInt32) * 4);
111                 case VK_FORMAT_R64_SINT:
112                 case VK_FORMAT_R64_UINT:
113                 case VK_FORMAT_R64_SFLOAT:
114                         return static_cast<deUint32>(sizeof(deInt64));
115                 case VK_FORMAT_R64G64_SINT:
116                 case VK_FORMAT_R64G64_UINT:
117                 case VK_FORMAT_R64G64_SFLOAT:
118                         return static_cast<deUint32>(sizeof(deInt64) * 2);
119                 case VK_FORMAT_R64G64B64_SINT:
120                 case VK_FORMAT_R64G64B64_UINT:
121                 case VK_FORMAT_R64G64B64_SFLOAT:
122                 case VK_FORMAT_R64G64B64A64_SINT:
123                 case VK_FORMAT_R64G64B64A64_UINT:
124                 case VK_FORMAT_R64G64B64A64_SFLOAT:
125                         return static_cast<deUint32>(sizeof(deInt64) * 4);
126                 // The below formats are used to represent bool and bvec* types. These
127                 // types are passed to the shader as int and ivec* types, before the
128                 // calculations are done as booleans. We need a distinct type here so
129                 // that the shader generators can switch on it and generate the correct
130                 // shader source for testing.
131                 case VK_FORMAT_R8_USCALED:
132                         return static_cast<deUint32>(sizeof(deInt32));
133                 case VK_FORMAT_R8G8_USCALED:
134                         return static_cast<deUint32>(sizeof(deInt32) * 2);
135                 case VK_FORMAT_R8G8B8_USCALED:
136                 case VK_FORMAT_R8G8B8A8_USCALED:
137                         return static_cast<deUint32>(sizeof(deInt32) * 4);
138         }
139 }
140
141 deUint32 getElementSizeInBytes(
142         const VkFormat format,
143         const subgroups::SSBOData::InputDataLayoutType layout)
144 {
145         deUint32 bytes = getFormatSizeInBytes(format);
146         if (layout == subgroups::SSBOData::LayoutStd140)
147                 return bytes < 16 ? 16 : bytes;
148         else
149                 return bytes;
150 }
151
152 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
153 {
154         VkAttachmentReference colorReference = {
155                 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
156         };
157
158         const VkSubpassDescription subpassDescription = {0u,
159                                                                                                          VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
160                                                                                                          DE_NULL, DE_NULL, 0, DE_NULL
161                                                                                                         };
162
163         const VkSubpassDependency subpassDependencies[2] = {
164                 {   VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
165                         VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
166                         VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
167                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
168                         VK_DEPENDENCY_BY_REGION_BIT
169                 },
170                 {   0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
171                         VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
172                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
173                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
174                         VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
175                 },
176         };
177
178         VkAttachmentDescription attachmentDescription = {0u, format,
179                                                                                                          VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
180                                                                                                          VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
181                                                                                                          VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
182                                                                                                          VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
183                                                                                                         };
184
185         const VkRenderPassCreateInfo renderPassCreateInfo = {
186                 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
187                 &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
188         };
189
190         return createRenderPass(context.getDeviceInterface(), context.getDevice(),
191                                                         &renderPassCreateInfo);
192 }
193
194 Move<VkPipeline> makeGraphicsPipeline(const DeviceInterface&                                            vk,
195                                                                           const VkDevice                                                                device,
196                                                                           const VkPipelineLayout                                                pipelineLayout,
197                                                                           const VkShaderModule                                                  vertexShaderModule,
198                                                                           const VkShaderModule                                                  tessellationControlShaderModule,
199                                                                           const VkShaderModule                                                  tessellationEvalShaderModule,
200                                                                           const VkShaderModule                                                  geometryShaderModule,
201                                                                           const VkShaderModule                                                  fragmentShaderModule,
202                                                                           const VkRenderPass                                                    renderPass,
203                                                                           const std::vector<VkViewport>&                                viewports,
204                                                                           const std::vector<VkRect2D>&                                  scissors,
205                                                                           const VkPrimitiveTopology                                             topology,
206                                                                           const deUint32                                                                subpass,
207                                                                           const deUint32                                                                patchControlPoints,
208                                                                           const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo,
209                                                                           const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo,
210                                                                           const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo,
211                                                                           const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo,
212                                                                           const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo,
213                                                                           const VkPipelineDynamicStateCreateInfo*               dynamicStateCreateInfo,
214                                                                           const deUint32                                                                vertexShaderStageCreateFlags,
215                                                                           const deUint32                                                                tessellationControlShaderStageCreateFlags,
216                                                                           const deUint32                                                                tessellationEvalShaderStageCreateFlags,
217                                                                           const deUint32                                                                geometryShaderStageCreateFlags,
218                                                                           const deUint32                                                                fragmentShaderStageCreateFlags,
219                                                                           const deUint32                                                                requiredSubgroupSize[5])
220 {
221         const VkBool32                                                                  disableRasterization                            = (fragmentShaderModule == DE_NULL);
222         const bool                                                                              hasTessellation                                         = (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
223
224         VkPipelineShaderStageCreateInfo                                 stageCreateInfo                                         =
225         {
226                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,    // VkStructureType                     sType
227                 DE_NULL,                                                                                                // const void*                         pNext
228                 0u,                                                                                                             // VkPipelineShaderStageCreateFlags    flags
229                 VK_SHADER_STAGE_VERTEX_BIT,                                                             // VkShaderStageFlagBits               stage
230                 DE_NULL,                                                                                                // VkShaderModule                      module
231                 "main",                                                                                                 // const char*                         pName
232                 DE_NULL                                                                                                 // const VkSpecializationInfo*         pSpecializationInfo
233         };
234
235         std::vector<VkPipelineShaderStageCreateInfo>    pipelineShaderStageParams;
236
237         const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
238                 {
239                         {
240                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
241                                 DE_NULL,
242                                 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
243                         },
244                         {
245                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
246                                 DE_NULL,
247                                 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
248                         },
249                         {
250                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
251                                 DE_NULL,
252                                 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
253                         },
254                         {
255                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
256                                 DE_NULL,
257                                 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
258                         },
259                         {
260                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
261                                 DE_NULL,
262                                 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
263                         },
264                 };
265         {
266                 stageCreateInfo.pNext   = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
267                 stageCreateInfo.flags   = vertexShaderStageCreateFlags;
268                 stageCreateInfo.stage   = VK_SHADER_STAGE_VERTEX_BIT;
269                 stageCreateInfo.module  = vertexShaderModule;
270                 pipelineShaderStageParams.push_back(stageCreateInfo);
271         }
272
273         if (tessellationControlShaderModule != DE_NULL)
274         {
275                 stageCreateInfo.pNext   = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
276                 stageCreateInfo.flags   = tessellationControlShaderStageCreateFlags;
277                 stageCreateInfo.stage   = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
278                 stageCreateInfo.module  = tessellationControlShaderModule;
279                 pipelineShaderStageParams.push_back(stageCreateInfo);
280         }
281
282         if (tessellationEvalShaderModule != DE_NULL)
283         {
284                 stageCreateInfo.pNext   = (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
285                 stageCreateInfo.flags   = tessellationEvalShaderStageCreateFlags;
286                 stageCreateInfo.stage   = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
287                 stageCreateInfo.module  = tessellationEvalShaderModule;
288                 pipelineShaderStageParams.push_back(stageCreateInfo);
289         }
290
291         if (geometryShaderModule != DE_NULL)
292         {
293                 stageCreateInfo.pNext   = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
294                 stageCreateInfo.flags   = geometryShaderStageCreateFlags;
295                 stageCreateInfo.stage   = VK_SHADER_STAGE_GEOMETRY_BIT;
296                 stageCreateInfo.module  = geometryShaderModule;
297                 pipelineShaderStageParams.push_back(stageCreateInfo);
298         }
299
300         if (fragmentShaderModule != DE_NULL)
301         {
302                 stageCreateInfo.pNext   = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
303                 stageCreateInfo.flags   = fragmentShaderStageCreateFlags;
304                 stageCreateInfo.stage   = VK_SHADER_STAGE_FRAGMENT_BIT;
305                 stageCreateInfo.module  = fragmentShaderModule;
306                 pipelineShaderStageParams.push_back(stageCreateInfo);
307         }
308
309         const VkVertexInputBindingDescription                   vertexInputBindingDescription           =
310         {
311                 0u,                                                             // deUint32             binding
312                 sizeof(tcu::Vec4),                              // deUint32             stride
313                 VK_VERTEX_INPUT_RATE_VERTEX,    // VkVertexInputRate    inputRate
314         };
315
316         const VkVertexInputAttributeDescription                 vertexInputAttributeDescription         =
317         {
318                 0u,                                                             // deUint32    location
319                 0u,                                                             // deUint32    binding
320                 VK_FORMAT_R32G32B32A32_SFLOAT,  // VkFormat    format
321                 0u                                                              // deUint32    offset
322         };
323
324         const VkPipelineVertexInputStateCreateInfo              vertexInputStateCreateInfoDefault       =
325         {
326                 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,      // VkStructureType                             sType
327                 DE_NULL,                                                                                                        // const void*                                 pNext
328                 (VkPipelineVertexInputStateCreateFlags)0,                                       // VkPipelineVertexInputStateCreateFlags       flags
329                 1u,                                                                                                                     // deUint32                                    vertexBindingDescriptionCount
330                 &vertexInputBindingDescription,                                                         // const VkVertexInputBindingDescription*      pVertexBindingDescriptions
331                 1u,                                                                                                                     // deUint32                                    vertexAttributeDescriptionCount
332                 &vertexInputAttributeDescription                                                        // const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions
333         };
334
335         const VkPipelineInputAssemblyStateCreateInfo    inputAssemblyStateCreateInfo            =
336         {
337                 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,    // VkStructureType                            sType
338                 DE_NULL,                                                                                                                // const void*                                pNext
339                 0u,                                                                                                                             // VkPipelineInputAssemblyStateCreateFlags    flags
340                 topology,                                                                                                               // VkPrimitiveTopology                        topology
341                 VK_FALSE                                                                                                                // VkBool32                                   primitiveRestartEnable
342         };
343
344         const VkPipelineTessellationStateCreateInfo             tessStateCreateInfo                                     =
345         {
346                 VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,      // VkStructureType                           sType
347                 DE_NULL,                                                                                                        // const void*                               pNext
348                 0u,                                                                                                                     // VkPipelineTessellationStateCreateFlags    flags
349                 patchControlPoints                                                                                      // deUint32                                  patchControlPoints
350         };
351
352         const VkPipelineViewportStateCreateInfo                 viewportStateCreateInfo                         =
353         {
354                 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,  // VkStructureType                             sType
355                 DE_NULL,                                                                                                // const void*                                 pNext
356                 (VkPipelineViewportStateCreateFlags)0,                                  // VkPipelineViewportStateCreateFlags          flags
357                 viewports.empty() ? 1u : (deUint32)viewports.size(),    // deUint32                                    viewportCount
358                 viewports.empty() ? DE_NULL : &viewports[0],                    // const VkViewport*                           pViewports
359                 viewports.empty() ? 1u : (deUint32)scissors.size(),             // deUint32                                    scissorCount
360                 scissors.empty() ? DE_NULL : &scissors[0]                               // const VkRect2D*                             pScissors
361         };
362
363         const VkPipelineRasterizationStateCreateInfo    rasterizationStateCreateInfoDefault     =
364         {
365                 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,     // VkStructureType                            sType
366                 DE_NULL,                                                                                                        // const void*                                pNext
367                 0u,                                                                                                                     // VkPipelineRasterizationStateCreateFlags    flags
368                 VK_FALSE,                                                                                                       // VkBool32                                   depthClampEnable
369                 disableRasterization,                                                                           // VkBool32                                   rasterizerDiscardEnable
370                 VK_POLYGON_MODE_FILL,                                                                           // VkPolygonMode                              polygonMode
371                 VK_CULL_MODE_NONE,                                                                                      // VkCullModeFlags                            cullMode
372                 VK_FRONT_FACE_COUNTER_CLOCKWISE,                                                        // VkFrontFace                                frontFace
373                 VK_FALSE,                                                                                                       // VkBool32                                   depthBiasEnable
374                 0.0f,                                                                                                           // float                                      depthBiasConstantFactor
375                 0.0f,                                                                                                           // float                                      depthBiasClamp
376                 0.0f,                                                                                                           // float                                      depthBiasSlopeFactor
377                 1.0f                                                                                                            // float                                      lineWidth
378         };
379
380         const VkPipelineMultisampleStateCreateInfo              multisampleStateCreateInfoDefault       =
381         {
382                 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,       // VkStructureType                          sType
383                 DE_NULL,                                                                                                        // const void*                              pNext
384                 0u,                                                                                                                     // VkPipelineMultisampleStateCreateFlags    flags
385                 VK_SAMPLE_COUNT_1_BIT,                                                                          // VkSampleCountFlagBits                    rasterizationSamples
386                 VK_FALSE,                                                                                                       // VkBool32                                 sampleShadingEnable
387                 1.0f,                                                                                                           // float                                    minSampleShading
388                 DE_NULL,                                                                                                        // const VkSampleMask*                      pSampleMask
389                 VK_FALSE,                                                                                                       // VkBool32                                 alphaToCoverageEnable
390                 VK_FALSE                                                                                                        // VkBool32                                 alphaToOneEnable
391         };
392
393         const VkStencilOpState                                                  stencilOpState                                          =
394         {
395                 VK_STENCIL_OP_KEEP,             // VkStencilOp    failOp
396                 VK_STENCIL_OP_KEEP,             // VkStencilOp    passOp
397                 VK_STENCIL_OP_KEEP,             // VkStencilOp    depthFailOp
398                 VK_COMPARE_OP_NEVER,    // VkCompareOp    compareOp
399                 0,                                              // deUint32       compareMask
400                 0,                                              // deUint32       writeMask
401                 0                                               // deUint32       reference
402         };
403
404         const VkPipelineDepthStencilStateCreateInfo             depthStencilStateCreateInfoDefault      =
405         {
406                 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,     // VkStructureType                          sType
407                 DE_NULL,                                                                                                        // const void*                              pNext
408                 0u,                                                                                                                     // VkPipelineDepthStencilStateCreateFlags   flags
409                 VK_FALSE,                                                                                                       // VkBool32                                 depthTestEnable
410                 VK_FALSE,                                                                                                       // VkBool32                                 depthWriteEnable
411                 VK_COMPARE_OP_LESS_OR_EQUAL,                                                            // VkCompareOp                              depthCompareOp
412                 VK_FALSE,                                                                                                       // VkBool32                                 depthBoundsTestEnable
413                 VK_FALSE,                                                                                                       // VkBool32                                 stencilTestEnable
414                 stencilOpState,                                                                                         // VkStencilOpState                         front
415                 stencilOpState,                                                                                         // VkStencilOpState                         back
416                 0.0f,                                                                                                           // float                                    minDepthBounds
417                 1.0f,                                                                                                           // float                                    maxDepthBounds
418         };
419
420         const VkPipelineColorBlendAttachmentState               colorBlendAttachmentState                       =
421         {
422                 VK_FALSE,                                       // VkBool32                 blendEnable
423                 VK_BLEND_FACTOR_ZERO,           // VkBlendFactor            srcColorBlendFactor
424                 VK_BLEND_FACTOR_ZERO,           // VkBlendFactor            dstColorBlendFactor
425                 VK_BLEND_OP_ADD,                        // VkBlendOp                colorBlendOp
426                 VK_BLEND_FACTOR_ZERO,           // VkBlendFactor            srcAlphaBlendFactor
427                 VK_BLEND_FACTOR_ZERO,           // VkBlendFactor            dstAlphaBlendFactor
428                 VK_BLEND_OP_ADD,                        // VkBlendOp                alphaBlendOp
429                 VK_COLOR_COMPONENT_R_BIT        // VkColorComponentFlags    colorWriteMask
430                 | VK_COLOR_COMPONENT_G_BIT
431                 | VK_COLOR_COMPONENT_B_BIT
432                 | VK_COLOR_COMPONENT_A_BIT
433         };
434
435         const VkPipelineColorBlendStateCreateInfo               colorBlendStateCreateInfoDefault        =
436         {
437                 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,       // VkStructureType                               sType
438                 DE_NULL,                                                                                                        // const void*                                   pNext
439                 0u,                                                                                                                     // VkPipelineColorBlendStateCreateFlags          flags
440                 VK_FALSE,                                                                                                       // VkBool32                                      logicOpEnable
441                 VK_LOGIC_OP_CLEAR,                                                                                      // VkLogicOp                                     logicOp
442                 1u,                                                                                                                     // deUint32                                      attachmentCount
443                 &colorBlendAttachmentState,                                                                     // const VkPipelineColorBlendAttachmentState*    pAttachments
444                 { 0.0f, 0.0f, 0.0f, 0.0f }                                                                      // float                                         blendConstants[4]
445         };
446
447         std::vector<VkDynamicState>                                             dynamicStates;
448
449         if (viewports.empty())
450                 dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
451         if (scissors.empty())
452                 dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
453
454         const VkPipelineDynamicStateCreateInfo                  dynamicStateCreateInfoDefault           =
455         {
456                 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,   // VkStructureType                      sType
457                 DE_NULL,                                                                                                // const void*                          pNext
458                 0u,                                                                                                             // VkPipelineDynamicStateCreateFlags    flags
459                 (deUint32)dynamicStates.size(),                                                 // deUint32                             dynamicStateCount
460                 dynamicStates.empty() ? DE_NULL : &dynamicStates[0]             // const VkDynamicState*                pDynamicStates
461         };
462
463         const VkPipelineDynamicStateCreateInfo*                 dynamicStateCreateInfoDefaultPtr        = dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
464
465         const VkGraphicsPipelineCreateInfo                              pipelineCreateInfo                                      =
466         {
467                 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,                                                                                                                // VkStructureType                                  sType
468                 DE_NULL,                                                                                                                                                                                                // const void*                                      pNext
469                 0u,                                                                                                                                                                                                             // VkPipelineCreateFlags                            flags
470                 (deUint32)pipelineShaderStageParams.size(),                                                                                                                             // deUint32                                         stageCount
471                 &pipelineShaderStageParams[0],                                                                                                                                                  // const VkPipelineShaderStageCreateInfo*           pStages
472                 vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault,                   // const VkPipelineVertexInputStateCreateInfo*      pVertexInputState
473                 &inputAssemblyStateCreateInfo,                                                                                                                                                  // const VkPipelineInputAssemblyStateCreateInfo*    pInputAssemblyState
474                 hasTessellation ? &tessStateCreateInfo : DE_NULL,                                                                                                               // const VkPipelineTessellationStateCreateInfo*     pTessellationState
475                 &viewportStateCreateInfo,                                                                                                                                                               // const VkPipelineViewportStateCreateInfo*         pViewportState
476                 rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault,             // const VkPipelineRasterizationStateCreateInfo*    pRasterizationState
477                 multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault,                    // const VkPipelineMultisampleStateCreateInfo*      pMultisampleState
478                 depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault,                // const VkPipelineDepthStencilStateCreateInfo*     pDepthStencilState
479                 colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault,                              // const VkPipelineColorBlendStateCreateInfo*       pColorBlendState
480                 dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr,                                             // const VkPipelineDynamicStateCreateInfo*          pDynamicState
481                 pipelineLayout,                                                                                                                                                                                 // VkPipelineLayout                                 layout
482                 renderPass,                                                                                                                                                                                             // VkRenderPass                                     renderPass
483                 subpass,                                                                                                                                                                                                // deUint32                                         subpass
484                 DE_NULL,                                                                                                                                                                                                // VkPipeline                                       basePipelineHandle
485                 0                                                                                                                                                                                                               // deInt32                                          basePipelineIndex;
486         };
487
488         return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
489 }
490
491 Move<VkPipeline> makeGraphicsPipeline(Context&                                                                  context,
492                                                                           const VkPipelineLayout                                        pipelineLayout,
493                                                                           const VkShaderStageFlags                                      stages,
494                                                                           const VkShaderModule                                          vertexShaderModule,
495                                                                           const VkShaderModule                                          fragmentShaderModule,
496                                                                           const VkShaderModule                                          geometryShaderModule,
497                                                                           const VkShaderModule                                          tessellationControlModule,
498                                                                           const VkShaderModule                                          tessellationEvaluationModule,
499                                                                           const VkRenderPass                                            renderPass,
500                                                                           const VkPrimitiveTopology                                     topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
501                                                                           const VkVertexInputBindingDescription*        vertexInputBindingDescription = DE_NULL,
502                                                                           const VkVertexInputAttributeDescription*      vertexInputAttributeDescriptions = DE_NULL,
503                                                                           const bool                                                            frameBufferTests = false,
504                                                                           const vk::VkFormat                                            attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
505                                                                           const deUint32                                                        vertexShaderStageCreateFlags = 0u,
506                                                                           const deUint32                                                        tessellationControlShaderStageCreateFlags = 0u,
507                                                                           const deUint32                                                        tessellationEvalShaderStageCreateFlags = 0u,
508                                                                           const deUint32                                                        geometryShaderStageCreateFlags = 0u,
509                                                                           const deUint32                                                        fragmentShaderStageCreateFlags = 0u,
510                                                                           const deUint32                                                        requiredSubgroupSize[5] = DE_NULL)
511 {
512         std::vector<VkViewport> noViewports;
513         std::vector<VkRect2D>   noScissors;
514
515         const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
516         {
517                 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,      // VkStructureType                                                              sType;
518                 DE_NULL,                                                                                                        // const void*                                                                  pNext;
519                 0u,                                                                                                                     // VkPipelineVertexInputStateCreateFlags                flags;
520                 vertexInputBindingDescription == DE_NULL ? 0u : 1u,                     // deUint32                                                                             vertexBindingDescriptionCount;
521                 vertexInputBindingDescription,                                                          // const VkVertexInputBindingDescription*               pVertexBindingDescriptions;
522                 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,          // deUint32                                                                             vertexAttributeDescriptionCount;
523                 vertexInputAttributeDescriptions,                                                       // const VkVertexInputAttributeDescription*             pVertexAttributeDescriptions;
524         };
525
526         const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
527         const VkColorComponentFlags colorComponent =
528                                                                                                 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
529                                                                                                 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
530                                                                                                 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
531                                                                                                 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
532
533         const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
534         {
535                 VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
536                 VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
537                 colorComponent
538         };
539
540         const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
541         {
542                 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
543                 VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
544                 { 0.0f, 0.0f, 0.0f, 0.0f }
545         };
546
547         const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
548
549         return makeGraphicsPipeline(context.getDeviceInterface(),       // const DeviceInterface&                        vk
550                                                                 context.getDevice(),                    // const VkDevice                                device
551                                                                 pipelineLayout,                                 // const VkPipelineLayout                        pipelineLayout
552                                                                 vertexShaderModule,                             // const VkShaderModule                          vertexShaderModule
553                                                                 tessellationControlModule,              // const VkShaderModule                          tessellationControlShaderModule
554                                                                 tessellationEvaluationModule,   // const VkShaderModule                          tessellationEvalShaderModule
555                                                                 geometryShaderModule,                   // const VkShaderModule                          geometryShaderModule
556                                                                 fragmentShaderModule,                   // const VkShaderModule                          fragmentShaderModule
557                                                                 renderPass,                                             // const VkRenderPass                            renderPass
558                                                                 noViewports,                                    // const std::vector<VkViewport>&                viewports
559                                                                 noScissors,                                             // const std::vector<VkRect2D>&                  scissors
560                                                                 topology,                                               // const VkPrimitiveTopology                     topology
561                                                                 0u,                                                             // const deUint32                                subpass
562                                                                 patchControlPoints,                             // const deUint32                                patchControlPoints
563                                                                 &vertexInputStateCreateInfo,    // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
564                                                                 DE_NULL,                                                // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
565                                                                 DE_NULL,                                                // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
566                                                                 DE_NULL,                                                // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
567                                                                 &colorBlendStateCreateInfo,             // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
568                                                                 DE_NULL,                                                // const VkPipelineDynamicStateCreateInfo*
569                                                                 vertexShaderStageCreateFlags,   // const deUint32                                                                vertexShaderStageCreateFlags,
570                                                                 tessellationControlShaderStageCreateFlags,      // const deUint32                                        tessellationControlShaderStageCreateFlags
571                                                                 tessellationEvalShaderStageCreateFlags,         // const deUint32                                        tessellationEvalShaderStageCreateFlags
572                                                                 geometryShaderStageCreateFlags, // const deUint32                                                                geometryShaderStageCreateFlags
573                                                                 fragmentShaderStageCreateFlags, // const deUint32                                                                fragmentShaderStageCreateFlags
574                                                                 requiredSubgroupSize);                  // const deUint32                                                                requiredSubgroupSize[5]
575 }
576
577 Move<VkCommandBuffer> makeCommandBuffer(
578         Context& context, const VkCommandPool commandPool)
579 {
580         const VkCommandBufferAllocateInfo bufferAllocateParams =
581         {
582                 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType              sType;
583                 DE_NULL,                                                                                // const void*                  pNext;
584                 commandPool,                                                                    // VkCommandPool                commandPool;
585                 VK_COMMAND_BUFFER_LEVEL_PRIMARY,                                // VkCommandBufferLevel level;
586                 1u,                                                                                             // deUint32                             bufferCount;
587         };
588         return allocateCommandBuffer(context.getDeviceInterface(),
589                                                                  context.getDevice(), &bufferAllocateParams);
590 }
591
592 struct Buffer;
593 struct Image;
594
595 struct BufferOrImage
596 {
597         bool isImage() const
598         {
599                 return m_isImage;
600         }
601
602         Buffer* getAsBuffer()
603         {
604                 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
605                 return reinterpret_cast<Buffer* >(this);
606         }
607
608         Image* getAsImage()
609         {
610                 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
611                 return reinterpret_cast<Image*>(this);
612         }
613
614         virtual VkDescriptorType getType() const
615         {
616                 if (m_isImage)
617                 {
618                         return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
619                 }
620                 else
621                 {
622                         return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
623                 }
624         }
625
626         Allocation& getAllocation() const
627         {
628                 return *m_allocation;
629         }
630
631         virtual ~BufferOrImage() {}
632
633 protected:
634         explicit BufferOrImage(bool image) : m_isImage(image) {}
635
636         bool m_isImage;
637         de::details::MovePtr<Allocation> m_allocation;
638 };
639
640 struct Buffer : public BufferOrImage
641 {
642         explicit Buffer(
643                 Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
644                 : BufferOrImage         (false)
645                 , m_sizeInBytes         (sizeInBytes)
646                 , m_usage                       (usage)
647         {
648                 const DeviceInterface&                  vkd                                     = context.getDeviceInterface();
649                 const VkDevice                                  device                          = context.getDevice();
650
651                 const vk::VkBufferCreateInfo    bufferCreateInfo        =
652                 {
653                         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
654                         DE_NULL,
655                         0u,
656                         m_sizeInBytes,
657                         m_usage,
658                         VK_SHARING_MODE_EXCLUSIVE,
659                         0u,
660                         DE_NULL,
661                 };
662                 m_buffer                = createBuffer(vkd, device, &bufferCreateInfo);
663
664                 VkMemoryRequirements                    req                                     = getBufferMemoryRequirements(vkd, device, *m_buffer);
665
666                 m_allocation    = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
667                 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
668         }
669
670         virtual VkDescriptorType getType() const
671         {
672                 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
673                 {
674                         return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
675                 }
676                 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
677         }
678
679         VkBuffer getBuffer () const
680         {
681                 return *m_buffer;
682         }
683
684         const VkBuffer* getBufferPtr () const
685         {
686                 return &(*m_buffer);
687         }
688
689         VkDeviceSize getSize () const
690         {
691                 return m_sizeInBytes;
692         }
693
694 private:
695         Move<VkBuffer>                          m_buffer;
696         VkDeviceSize                            m_sizeInBytes;
697         const VkBufferUsageFlags        m_usage;
698 };
699
700 struct Image : public BufferOrImage
701 {
702         explicit Image(Context& context, deUint32 width, deUint32 height,
703                                    VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
704                 : BufferOrImage(true)
705         {
706                 const DeviceInterface&                  vk                                      = context.getDeviceInterface();
707                 const VkDevice                                  device                          = context.getDevice();
708                 const deUint32                                  queueFamilyIndex        = context.getUniversalQueueFamilyIndex();
709
710                 const VkImageCreateInfo                 imageCreateInfo         =
711                 {
712                         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
713                         format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
714                         VK_IMAGE_TILING_OPTIMAL, usage,
715                         VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
716                         VK_IMAGE_LAYOUT_UNDEFINED
717                 };
718
719                 const VkComponentMapping                componentMapping        =
720                 {
721                         VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
722                         VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
723                 };
724
725                 const VkImageSubresourceRange   subresourceRange        =
726                 {
727                         VK_IMAGE_ASPECT_COLOR_BIT,      //VkImageAspectFlags    aspectMask
728                         0u,                                                     //deUint32                              baseMipLevel
729                         1u,                                                     //deUint32                              levelCount
730                         0u,                                                     //deUint32                              baseArrayLayer
731                         1u                                                      //deUint32                              layerCount
732                 };
733
734                 const VkSamplerCreateInfo               samplerCreateInfo       =
735                 {
736                         VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
737                         DE_NULL,
738                         0u,
739                         VK_FILTER_NEAREST,
740                         VK_FILTER_NEAREST,
741                         VK_SAMPLER_MIPMAP_MODE_NEAREST,
742                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
743                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
744                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
745                         0.0f,
746                         VK_FALSE,
747                         1.0f,
748                         DE_FALSE,
749                         VK_COMPARE_OP_ALWAYS,
750                         0.0f,
751                         0.0f,
752                         VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
753                         VK_FALSE,
754                 };
755
756                 m_image                 = createImage(vk, device, &imageCreateInfo);
757
758                 VkMemoryRequirements                    req                                     = getImageMemoryRequirements(vk, device, *m_image);
759
760                 req.size                *= 2;
761                 m_allocation    = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
762
763                 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
764
765                 const VkImageViewCreateInfo             imageViewCreateInfo     =
766                 {
767                         VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
768                         VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
769                         subresourceRange
770                 };
771
772                 m_imageView             = createImageView(vk, device, &imageViewCreateInfo);
773                 m_sampler               = createSampler(vk, device, &samplerCreateInfo);
774
775                 // Transition input image layouts
776                 {
777                         const Unique<VkCommandPool>             cmdPool                 (makeCommandPool(vk, device, queueFamilyIndex));
778                         const Unique<VkCommandBuffer>   cmdBuffer               (makeCommandBuffer(context, *cmdPool));
779
780                         beginCommandBuffer(vk, *cmdBuffer);
781
782                         const VkImageMemoryBarrier              imageBarrier    = makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
783                                                                                                                                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
784
785                         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
786                                 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
787
788                         endCommandBuffer(vk, *cmdBuffer);
789                         submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
790                 }
791         }
792
793         VkImage getImage () const
794         {
795                 return *m_image;
796         }
797
798         VkImageView getImageView () const
799         {
800                 return *m_imageView;
801         }
802
803         VkSampler getSampler () const
804         {
805                 return *m_sampler;
806         }
807
808 private:
809         Move<VkImage> m_image;
810         Move<VkImageView> m_imageView;
811         Move<VkSampler> m_sampler;
812 };
813 }
814
815 std::string vkt::subgroups::getSharedMemoryBallotHelper()
816 {
817         return  "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
818                         "uvec4 sharedMemoryBallot(bool vote)\n"
819                         "{\n"
820                         "  uint groupOffset = gl_SubgroupID;\n"
821                         "  // One invocation in the group 0's the whole group's data\n"
822                         "  if (subgroupElect())\n"
823                         "  {\n"
824                         "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
825                         "  }\n"
826                         "  subgroupMemoryBarrierShared();\n"
827                         "  if (vote)\n"
828                         "  {\n"
829                         "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
830                         "    const highp uint bitToSet = 1u << invocationId;\n"
831                         "    switch (gl_SubgroupInvocationID / 32)\n"
832                         "    {\n"
833                         "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
834                         "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
835                         "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
836                         "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
837                         "    }\n"
838                         "  }\n"
839                         "  subgroupMemoryBarrierShared();\n"
840                         "  return superSecretComputeShaderHelper[groupOffset];\n"
841                         "}\n";
842 }
843
844 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
845 {
846         return  "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
847                         "uint64_t sharedMemoryBallot(bool vote)\n"
848                         "{\n"
849                         "  uint groupOffset = gl_SubgroupID;\n"
850                         "  // One invocation in the group 0's the whole group's data\n"
851                         "  if (subgroupElect())\n"
852                         "  {\n"
853                         "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
854                         "  }\n"
855                         "  subgroupMemoryBarrierShared();\n"
856                         "  if (vote)\n"
857                         "  {\n"
858                         "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
859                         "    const highp uint bitToSet = 1u << invocationId;\n"
860                         "    switch (gl_SubgroupInvocationID / 32)\n"
861                         "    {\n"
862                         "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
863                         "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
864                         "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
865                         "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
866                         "    }\n"
867                         "  }\n"
868                         "  subgroupMemoryBarrierShared();\n"
869                         "  return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
870                         "}\n";
871 }
872
873 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
874 {
875         VkPhysicalDeviceSubgroupProperties subgroupProperties;
876         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
877         subgroupProperties.pNext = DE_NULL;
878
879         VkPhysicalDeviceProperties2 properties;
880         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
881         properties.pNext = &subgroupProperties;
882
883         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
884
885         return subgroupProperties.subgroupSize;
886 }
887
888 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
889         return 128u;
890 }
891
892 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
893 {
894         switch (stage)
895         {
896                 default:
897                         DE_FATAL("Unhandled stage!");
898                         return "";
899                 case VK_SHADER_STAGE_COMPUTE_BIT:
900                         return "compute";
901                 case VK_SHADER_STAGE_FRAGMENT_BIT:
902                         return "fragment";
903                 case VK_SHADER_STAGE_VERTEX_BIT:
904                         return "vertex";
905                 case VK_SHADER_STAGE_GEOMETRY_BIT:
906                         return "geometry";
907                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
908                         return "tess_control";
909                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
910                         return "tess_eval";
911         }
912 }
913
914 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
915 {
916         switch (bit)
917         {
918                 default:
919                         DE_FATAL("Unknown subgroup feature category!");
920                         return "";
921                 case VK_SUBGROUP_FEATURE_BASIC_BIT:
922                         return "VK_SUBGROUP_FEATURE_BASIC_BIT";
923                 case VK_SUBGROUP_FEATURE_VOTE_BIT:
924                         return "VK_SUBGROUP_FEATURE_VOTE_BIT";
925                 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
926                         return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
927                 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
928                         return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
929                 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
930                         return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
931                 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
932                         return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
933                 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
934                         return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
935                 case VK_SUBGROUP_FEATURE_QUAD_BIT:
936                         return "VK_SUBGROUP_FEATURE_QUAD_BIT";
937         }
938 }
939
940 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
941 {
942         {
943         /*
944                 "#version 450\n"
945                 "void main (void)\n"
946                 "{\n"
947                 "  float pixelSize = 2.0f/1024.0f;\n"
948                 "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
949                 "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
950                 "  gl_PointSize = 1.0f;\n"
951                 "}\n"
952         */
953                 const std::string vertNoSubgroup =
954                         "; SPIR-V\n"
955                         "; Version: 1.3\n"
956                         "; Generator: Khronos Glslang Reference Front End; 1\n"
957                         "; Bound: 37\n"
958                         "; Schema: 0\n"
959                         "OpCapability Shader\n"
960                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
961                         "OpMemoryModel Logical GLSL450\n"
962                         "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
963                         "OpMemberDecorate %20 0 BuiltIn Position\n"
964                         "OpMemberDecorate %20 1 BuiltIn PointSize\n"
965                         "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
966                         "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
967                         "OpDecorate %20 Block\n"
968                         "OpDecorate %26 BuiltIn VertexIndex\n"
969                         "%2 = OpTypeVoid\n"
970                         "%3 = OpTypeFunction %2\n"
971                         "%6 = OpTypeFloat 32\n"
972                         "%7 = OpTypePointer Function %6\n"
973                         "%9 = OpConstant %6 0.00195313\n"
974                         "%12 = OpConstant %6 2\n"
975                         "%14 = OpConstant %6 1\n"
976                         "%16 = OpTypeVector %6 4\n"
977                         "%17 = OpTypeInt 32 0\n"
978                         "%18 = OpConstant %17 1\n"
979                         "%19 = OpTypeArray %6 %18\n"
980                         "%20 = OpTypeStruct %16 %6 %19 %19\n"
981                         "%21 = OpTypePointer Output %20\n"
982                         "%22 = OpVariable %21 Output\n"
983                         "%23 = OpTypeInt 32 1\n"
984                         "%24 = OpConstant %23 0\n"
985                         "%25 = OpTypePointer Input %23\n"
986                         "%26 = OpVariable %25 Input\n"
987                         "%33 = OpConstant %6 0\n"
988                         "%35 = OpTypePointer Output %16\n"
989                         "%37 = OpConstant %23 1\n"
990                         "%38 = OpTypePointer Output %6\n"
991                         "%4 = OpFunction %2 None %3\n"
992                         "%5 = OpLabel\n"
993                         "%8 = OpVariable %7 Function\n"
994                         "%10 = OpVariable %7 Function\n"
995                         "OpStore %8 %9\n"
996                         "%11 = OpLoad %6 %8\n"
997                         "%13 = OpFDiv %6 %11 %12\n"
998                         "%15 = OpFSub %6 %13 %14\n"
999                         "OpStore %10 %15\n"
1000                         "%27 = OpLoad %23 %26\n"
1001                         "%28 = OpConvertSToF %6 %27\n"
1002                         "%29 = OpLoad %6 %8\n"
1003                         "%30 = OpFMul %6 %28 %29\n"
1004                         "%31 = OpLoad %6 %10\n"
1005                         "%32 = OpFAdd %6 %30 %31\n"
1006                         "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1007                         "%36 = OpAccessChain %35 %22 %24\n"
1008                         "OpStore %36 %34\n"
1009                         "%39 = OpAccessChain %38 %22 %37\n"
1010                         "OpStore %39 %14\n"
1011                         "OpReturn\n"
1012                         "OpFunctionEnd\n";
1013                 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1014         }
1015
1016         {
1017         /*
1018                 "#version 450\n"
1019                 "layout(vertices=1) out;\n"
1020                 "\n"
1021                 "void main (void)\n"
1022                 "{\n"
1023                 "  if (gl_InvocationID == 0)\n"
1024                 "  {\n"
1025                 "    gl_TessLevelOuter[0] = 1.0f;\n"
1026                 "    gl_TessLevelOuter[1] = 1.0f;\n"
1027                 "  }\n"
1028                 "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1029                 "}\n"
1030         */
1031                 const std::string tescNoSubgroup =
1032                         "; SPIR-V\n"
1033                         "; Version: 1.3\n"
1034                         "; Generator: Khronos Glslang Reference Front End; 1\n"
1035                         "; Bound: 45\n"
1036                         "; Schema: 0\n"
1037                         "OpCapability Tessellation\n"
1038                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1039                         "OpMemoryModel Logical GLSL450\n"
1040                         "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1041                         "OpExecutionMode %4 OutputVertices 1\n"
1042                         "OpDecorate %8 BuiltIn InvocationId\n"
1043                         "OpDecorate %20 Patch\n"
1044                         "OpDecorate %20 BuiltIn TessLevelOuter\n"
1045                         "OpMemberDecorate %29 0 BuiltIn Position\n"
1046                         "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1047                         "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1048                         "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1049                         "OpDecorate %29 Block\n"
1050                         "OpMemberDecorate %34 0 BuiltIn Position\n"
1051                         "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1052                         "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1053                         "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1054                         "OpDecorate %34 Block\n"
1055                         "%2 = OpTypeVoid\n"
1056                         "%3 = OpTypeFunction %2\n"
1057                         "%6 = OpTypeInt 32 1\n"
1058                         "%7 = OpTypePointer Input %6\n"
1059                         "%8 = OpVariable %7 Input\n"
1060                         "%10 = OpConstant %6 0\n"
1061                         "%11 = OpTypeBool\n"
1062                         "%15 = OpTypeFloat 32\n"
1063                         "%16 = OpTypeInt 32 0\n"
1064                         "%17 = OpConstant %16 4\n"
1065                         "%18 = OpTypeArray %15 %17\n"
1066                         "%19 = OpTypePointer Output %18\n"
1067                         "%20 = OpVariable %19 Output\n"
1068                         "%21 = OpConstant %15 1\n"
1069                         "%22 = OpTypePointer Output %15\n"
1070                         "%24 = OpConstant %6 1\n"
1071                         "%26 = OpTypeVector %15 4\n"
1072                         "%27 = OpConstant %16 1\n"
1073                         "%28 = OpTypeArray %15 %27\n"
1074                         "%29 = OpTypeStruct %26 %15 %28 %28\n"
1075                         "%30 = OpTypeArray %29 %27\n"
1076                         "%31 = OpTypePointer Output %30\n"
1077                         "%32 = OpVariable %31 Output\n"
1078                         "%34 = OpTypeStruct %26 %15 %28 %28\n"
1079                         "%35 = OpConstant %16 32\n"
1080                         "%36 = OpTypeArray %34 %35\n"
1081                         "%37 = OpTypePointer Input %36\n"
1082                         "%38 = OpVariable %37 Input\n"
1083                         "%40 = OpTypePointer Input %26\n"
1084                         "%43 = OpTypePointer Output %26\n"
1085                         "%4 = OpFunction %2 None %3\n"
1086                         "%5 = OpLabel\n"
1087                         "%9 = OpLoad %6 %8\n"
1088                         "%12 = OpIEqual %11 %9 %10\n"
1089                         "OpSelectionMerge %14 None\n"
1090                         "OpBranchConditional %12 %13 %14\n"
1091                         "%13 = OpLabel\n"
1092                         "%23 = OpAccessChain %22 %20 %10\n"
1093                         "OpStore %23 %21\n"
1094                         "%25 = OpAccessChain %22 %20 %24\n"
1095                         "OpStore %25 %21\n"
1096                         "OpBranch %14\n"
1097                         "%14 = OpLabel\n"
1098                         "%33 = OpLoad %6 %8\n"
1099                         "%39 = OpLoad %6 %8\n"
1100                         "%41 = OpAccessChain %40 %38 %39 %10\n"
1101                         "%42 = OpLoad %26 %41\n"
1102                         "%44 = OpAccessChain %43 %32 %33 %10\n"
1103                         "OpStore %44 %42\n"
1104                         "OpReturn\n"
1105                         "OpFunctionEnd\n";
1106                 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1107         }
1108
1109         {
1110         /*
1111                 "#version 450\n"
1112                 "layout(isolines) in;\n"
1113                 "\n"
1114                 "void main (void)\n"
1115                 "{\n"
1116                 "  float pixelSize = 2.0f/1024.0f;\n"
1117                 "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1118                 "}\n";
1119         */
1120                 const std::string teseNoSubgroup =
1121                         "; SPIR-V\n"
1122                         "; Version: 1.3\n"
1123                         "; Generator: Khronos Glslang Reference Front End; 2\n"
1124                         "; Bound: 42\n"
1125                         "; Schema: 0\n"
1126                         "OpCapability Tessellation\n"
1127                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1128                         "OpMemoryModel Logical GLSL450\n"
1129                         "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1130                         "OpExecutionMode %4 Isolines\n"
1131                         "OpExecutionMode %4 SpacingEqual\n"
1132                         "OpExecutionMode %4 VertexOrderCcw\n"
1133                         "OpMemberDecorate %14 0 BuiltIn Position\n"
1134                         "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1135                         "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1136                         "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1137                         "OpDecorate %14 Block\n"
1138                         "OpMemberDecorate %19 0 BuiltIn Position\n"
1139                         "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1140                         "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1141                         "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1142                         "OpDecorate %19 Block\n"
1143                         "OpDecorate %29 BuiltIn TessCoord\n"
1144                         "%2 = OpTypeVoid\n"
1145                         "%3 = OpTypeFunction %2\n"
1146                         "%6 = OpTypeFloat 32\n"
1147                         "%7 = OpTypePointer Function %6\n"
1148                         "%9 = OpConstant %6 0.00195313\n"
1149                         "%10 = OpTypeVector %6 4\n"
1150                         "%11 = OpTypeInt 32 0\n"
1151                         "%12 = OpConstant %11 1\n"
1152                         "%13 = OpTypeArray %6 %12\n"
1153                         "%14 = OpTypeStruct %10 %6 %13 %13\n"
1154                         "%15 = OpTypePointer Output %14\n"
1155                         "%16 = OpVariable %15 Output\n"
1156                         "%17 = OpTypeInt 32 1\n"
1157                         "%18 = OpConstant %17 0\n"
1158                         "%19 = OpTypeStruct %10 %6 %13 %13\n"
1159                         "%20 = OpConstant %11 32\n"
1160                         "%21 = OpTypeArray %19 %20\n"
1161                         "%22 = OpTypePointer Input %21\n"
1162                         "%23 = OpVariable %22 Input\n"
1163                         "%24 = OpTypePointer Input %10\n"
1164                         "%27 = OpTypeVector %6 3\n"
1165                         "%28 = OpTypePointer Input %27\n"
1166                         "%29 = OpVariable %28 Input\n"
1167                         "%30 = OpConstant %11 0\n"
1168                         "%31 = OpTypePointer Input %6\n"
1169                         "%36 = OpConstant %6 2\n"
1170                         "%40 = OpTypePointer Output %10\n"
1171                         "%4 = OpFunction %2 None %3\n"
1172                         "%5 = OpLabel\n"
1173                         "%8 = OpVariable %7 Function\n"
1174                         "OpStore %8 %9\n"
1175                         "%25 = OpAccessChain %24 %23 %18 %18\n"
1176                         "%26 = OpLoad %10 %25\n"
1177                         "%32 = OpAccessChain %31 %29 %30\n"
1178                         "%33 = OpLoad %6 %32\n"
1179                         "%34 = OpLoad %6 %8\n"
1180                         "%35 = OpFMul %6 %33 %34\n"
1181                         "%37 = OpFDiv %6 %35 %36\n"
1182                         "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1183                         "%39 = OpFAdd %10 %26 %38\n"
1184                         "%41 = OpAccessChain %40 %16 %18\n"
1185                         "OpStore %41 %39\n"
1186                         "OpReturn\n"
1187                         "OpFunctionEnd\n";
1188                 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1189         }
1190
1191 }
1192
1193
1194 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
1195 {
1196         switch (stage)
1197         {
1198                 default:
1199                         DE_FATAL("Unhandled stage!");
1200                         return "";
1201                 case VK_SHADER_STAGE_FRAGMENT_BIT:
1202                         return
1203                                 "#version 450\n"
1204                                 "void main (void)\n"
1205                                 "{\n"
1206                                 "  float pixelSize = 2.0f/1024.0f;\n"
1207                                 "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1208                                 "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1209                                 "}\n";
1210                 case VK_SHADER_STAGE_GEOMETRY_BIT:
1211                         return
1212                                 "#version 450\n"
1213                                 "void main (void)\n"
1214                                 "{\n"
1215                                 "}\n";
1216                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1217                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1218                         return
1219                                 "#version 450\n"
1220                                 "void main (void)\n"
1221                                 "{\n"
1222                                 "}\n";
1223         }
1224 }
1225
1226 void vkt::subgroups::initStdFrameBufferPrograms(        SourceCollections&                              programCollection,
1227                                                                                                         const vk::ShaderBuildOptions&   buildOptions,
1228                                                                                                         VkShaderStageFlags                              shaderStage,
1229                                                                                                         VkFormat                                                format,
1230                                                                                                         bool                                                    gsPointSize,
1231                                                                                                         std::string                                             extHeader,
1232                                                                                                         std::string                                             testSrc,
1233                                                                                                         std::string                                             helperStr)
1234 {
1235         subgroups::setFragmentShaderFrameBuffer(programCollection);
1236
1237         if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1238                 subgroups::setVertexShaderFrameBuffer(programCollection);
1239
1240         if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1241         {
1242                 std::ostringstream vertex;
1243                 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1244                         << extHeader.c_str()
1245                         << "layout(location = 0) in highp vec4 in_position;\n"
1246                         << "layout(location = 0) out float result;\n"
1247                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
1248                         << "{\n"
1249                         << "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1250                         << "};\n"
1251                         << "\n"
1252                         << helperStr.c_str()
1253                         << "void main (void)\n"
1254                         << "{\n"
1255                         << "  uint tempRes;\n"
1256                         << testSrc
1257                         << "  result = float(tempRes);\n"
1258                         << "  gl_Position = in_position;\n"
1259                         << "  gl_PointSize = 1.0f;\n"
1260                         << "}\n";
1261                 programCollection.glslSources.add("vert")
1262                         << glu::VertexSource(vertex.str()) << buildOptions;
1263         }
1264         else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1265         {
1266                 std::ostringstream geometry;
1267
1268                 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1269                         << extHeader.c_str()
1270                         << "layout(points) in;\n"
1271                         << "layout(points, max_vertices = 1) out;\n"
1272                         << "layout(location = 0) out float out_color;\n"
1273                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
1274                         << "{\n"
1275                         << "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1276                         << "};\n"
1277                         << "\n"
1278                         << helperStr.c_str()
1279                         << "void main (void)\n"
1280                         << "{\n"
1281                         << "  uint tempRes;\n"
1282                         << testSrc
1283                         << "  out_color = float(tempRes);\n"
1284                         << "  gl_Position = gl_in[0].gl_Position;\n"
1285                         << (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1286                         << "  EmitVertex();\n"
1287                         << "  EndPrimitive();\n"
1288                         << "}\n";
1289
1290                 programCollection.glslSources.add("geometry")
1291                         << glu::GeometrySource(geometry.str()) << buildOptions;
1292         }
1293         else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1294         {
1295                 std::ostringstream controlSource;
1296                 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1297                         << extHeader.c_str()
1298                         << "layout(vertices = 2) out;\n"
1299                         << "layout(location = 0) out float out_color[];\n"
1300                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
1301                         << "{\n"
1302                         << "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1303                         << "};\n"
1304                         << "\n"
1305                         << helperStr.c_str()
1306                         << "void main (void)\n"
1307                         << "{\n"
1308                         << "  if (gl_InvocationID == 0)\n"
1309                         << "  {\n"
1310                         << "    gl_TessLevelOuter[0] = 1.0f;\n"
1311                         << "    gl_TessLevelOuter[1] = 1.0f;\n"
1312                         << "  }\n"
1313                         << "  uint tempRes;\n"
1314                         << testSrc
1315                         << "  out_color[gl_InvocationID] = float(tempRes);\n"
1316                         << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1317                         << "}\n";
1318
1319                 programCollection.glslSources.add("tesc")
1320                         << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1321                 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1322         }
1323         else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1324         {
1325                 ostringstream evaluationSource;
1326                 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1327                         << extHeader.c_str()
1328                         << "layout(isolines, equal_spacing, ccw ) in;\n"
1329                         << "layout(location = 0) out float out_color;\n"
1330                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
1331                         << "{\n"
1332                         << "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1333                         << "};\n"
1334                         << "\n"
1335                         << helperStr.c_str()
1336                         << "void main (void)\n"
1337                         << "{\n"
1338                         << "  uint tempRes;\n"
1339                         << testSrc
1340                         << "  out_color = float(tempRes);\n"
1341                         << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1342                         << "}\n";
1343
1344                 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1345                 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1346         }
1347         else
1348         {
1349                 DE_FATAL("Unsupported shader stage");
1350         }
1351 }
1352
1353 void vkt::subgroups::initStdPrograms(   vk::SourceCollections&                  programCollection,
1354                                                                                 const vk::ShaderBuildOptions&   buildOptions,
1355                                                                                 vk::VkShaderStageFlags                  shaderStage,
1356                                                                                 vk::VkFormat                                    format,
1357                                                                                 std::string                                             extHeader,
1358                                                                                 std::string                                             testSrc,
1359                                                                                 std::string                                             helperStr)
1360 {
1361         if (shaderStage == VK_SHADER_STAGE_COMPUTE_BIT)
1362         {
1363                 std::ostringstream src;
1364
1365                 src << "#version 450\n"
1366                         << extHeader.c_str()
1367                         << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1368                         "local_size_z_id = 2) in;\n"
1369                         << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1370                         << "{\n"
1371                         << "  uint result[];\n"
1372                         << "};\n"
1373                         << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
1374                         << "{\n"
1375                         << "  " << subgroups::getFormatNameForGLSL(format) << " data[];\n"
1376                         << "};\n"
1377                         << "\n"
1378                         << helperStr.c_str()
1379                         << "void main (void)\n"
1380                         << "{\n"
1381                         << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1382                         << "  highp uint offset = globalSize.x * ((globalSize.y * "
1383                         "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1384                         "gl_GlobalInvocationID.x;\n"
1385                         << "  uint tempRes;\n"
1386                         << testSrc
1387                         << "  result[offset] = tempRes;\n"
1388                         << "}\n";
1389
1390                 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1391         }
1392         else
1393         {
1394                 const string vertex =
1395                         "#version 450\n"
1396                         + extHeader +
1397                         "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1398                         "{\n"
1399                         "  uint result[];\n"
1400                         "};\n"
1401                         "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1402                         "{\n"
1403                         "  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1404                         "};\n"
1405                         "\n"
1406                         + helperStr +
1407                         "void main (void)\n"
1408                         "{\n"
1409                         "  uint tempRes;\n"
1410                         + testSrc +
1411                         "  result[gl_VertexIndex] = tempRes;\n"
1412                         "  float pixelSize = 2.0f/1024.0f;\n"
1413                         "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1414                         "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1415                         "  gl_PointSize = 1.0f;\n"
1416                         "}\n";
1417
1418                 const string tesc =
1419                         "#version 450\n"
1420                         + extHeader +
1421                         "layout(vertices=1) out;\n"
1422                         "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
1423                         "{\n"
1424                         "  uint result[];\n"
1425                         "};\n"
1426                         "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1427                         "{\n"
1428                         "  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1429                         "};\n"
1430                         "\n"
1431                         + helperStr +
1432                         "void main (void)\n"
1433                         "{\n"
1434                         "  uint tempRes;\n"
1435                         + testSrc +
1436                         "  result[gl_PrimitiveID] = tempRes;\n"
1437                         "  if (gl_InvocationID == 0)\n"
1438                         "  {\n"
1439                         "    gl_TessLevelOuter[0] = 1.0f;\n"
1440                         "    gl_TessLevelOuter[1] = 1.0f;\n"
1441                         "  }\n"
1442                         "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1443                         "}\n";
1444
1445                 const string tese =
1446                         "#version 450\n"
1447                         + extHeader +
1448                         "layout(isolines) in;\n"
1449                         "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
1450                         "{\n"
1451                         "  uint result[];\n"
1452                         "};\n"
1453                         "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1454                         "{\n"
1455                         "  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1456                         "};\n"
1457                         "\n"
1458                         + helperStr +
1459                         "void main (void)\n"
1460                         "{\n"
1461                         "  uint tempRes;\n"
1462                         + testSrc +
1463                         "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1464                         "  float pixelSize = 2.0f/1024.0f;\n"
1465                         "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1466                         "}\n";
1467
1468                 const string geometry =
1469                         "#version 450\n"
1470                         + extHeader +
1471                         "layout(${TOPOLOGY}) in;\n"
1472                         "layout(points, max_vertices = 1) out;\n"
1473                         "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
1474                         "{\n"
1475                         "  uint result[];\n"
1476                         "};\n"
1477                         "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1478                         "{\n"
1479                         "  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1480                         "};\n"
1481                         "\n"
1482                         + helperStr +
1483                         "void main (void)\n"
1484                         "{\n"
1485                         "  uint tempRes;\n"
1486                         + testSrc +
1487                         "  result[gl_PrimitiveIDIn] = tempRes;\n"
1488                         "  gl_Position = gl_in[0].gl_Position;\n"
1489                         "  EmitVertex();\n"
1490                         "  EndPrimitive();\n"
1491                         "}\n";
1492
1493                 const string fragment =
1494                         "#version 450\n"
1495                         + extHeader +
1496                         "layout(location = 0) out uint result;\n"
1497                         "layout(set = 0, binding = 4, std430) readonly buffer Buffer1\n"
1498                         "{\n"
1499                         "  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1500                         "};\n"
1501                         + helperStr +
1502                         "void main (void)\n"
1503                         "{\n"
1504                         "  uint tempRes;\n"
1505                         + testSrc +
1506                         "  result = tempRes;\n"
1507                         "}\n";
1508
1509                 subgroups::addNoSubgroupShader(programCollection);
1510
1511                 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1512                 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1513                 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1514                 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1515                 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1516         }
1517 }
1518
1519 bool vkt::subgroups::isSubgroupSupported(Context& context)
1520 {
1521         return context.contextSupports(vk::ApiVersion(1, 1, 0));
1522 }
1523
1524 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
1525         Context& context, const VkShaderStageFlags stage)
1526 {
1527         VkPhysicalDeviceSubgroupProperties subgroupProperties;
1528         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1529         subgroupProperties.pNext = DE_NULL;
1530
1531         VkPhysicalDeviceProperties2 properties;
1532         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1533         properties.pNext = &subgroupProperties;
1534
1535         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1536
1537         return (stage & subgroupProperties.supportedStages) ? true : false;
1538 }
1539
1540 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
1541         VkShaderStageFlags stage)
1542 {
1543         switch (stage)
1544         {
1545                 default:
1546                         return false;
1547                 case VK_SHADER_STAGE_COMPUTE_BIT:
1548                         return true;
1549         }
1550 }
1551
1552 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
1553         Context& context,
1554         VkSubgroupFeatureFlagBits bit) {
1555         VkPhysicalDeviceSubgroupProperties subgroupProperties;
1556         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1557         subgroupProperties.pNext = DE_NULL;
1558
1559         VkPhysicalDeviceProperties2 properties;
1560         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1561         properties.pNext = &subgroupProperties;
1562
1563         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1564
1565         return (bit & subgroupProperties.supportedOperations) ? true : false;
1566 }
1567
1568 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
1569 {
1570         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1571                                 context.getInstanceInterface(), context.getPhysicalDevice());
1572         return features.fragmentStoresAndAtomics ? true : false;
1573 }
1574
1575 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1576 {
1577         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1578                                 context.getInstanceInterface(), context.getPhysicalDevice());
1579         return features.vertexPipelineStoresAndAtomics ? true : false;
1580 }
1581
1582 bool vkt::subgroups::isInt64SupportedForDevice(Context& context)
1583 {
1584         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1585                                 context.getInstanceInterface(), context.getPhysicalDevice());
1586         return features.shaderInt64 ? true : false;
1587 }
1588
1589 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1590 {
1591         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1592                 context.getInstanceInterface(), context.getPhysicalDevice());
1593         return features.shaderTessellationAndGeometryPointSize ? true : false;
1594 }
1595
1596 bool vkt::subgroups::isFormatSupportedForDevice(Context& context, vk::VkFormat format)
1597 {
1598         VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures subgroupExtendedTypesFeatures;
1599         deMemset(&subgroupExtendedTypesFeatures, 0, sizeof(subgroupExtendedTypesFeatures));
1600         subgroupExtendedTypesFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES;
1601         subgroupExtendedTypesFeatures.pNext = DE_NULL;
1602
1603         VkPhysicalDeviceShaderFloat16Int8Features float16Int8Features;
1604         deMemset(&float16Int8Features, 0, sizeof(float16Int8Features));
1605         float16Int8Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
1606         float16Int8Features.pNext = DE_NULL;
1607
1608         VkPhysicalDeviceFeatures2 features2;
1609         deMemset(&features2, 0, sizeof(features2));
1610         features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1611         features2.pNext = DE_NULL;
1612
1613         VkPhysicalDevice16BitStorageFeatures storage16bit;
1614         deMemset(&storage16bit, 0, sizeof(storage16bit));
1615         storage16bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
1616         storage16bit.pNext = DE_NULL;
1617         bool is16bitStorageSupported = context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage");
1618
1619         if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1620                 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1621         {
1622                 features2.pNext = &subgroupExtendedTypesFeatures;
1623                 subgroupExtendedTypesFeatures.pNext = &float16Int8Features;
1624                 if ( is16bitStorageSupported )
1625                 {
1626                         float16Int8Features.pNext = &storage16bit;
1627                 }
1628
1629         }
1630
1631         const PlatformInterface&                platformInterface               = context.getPlatformInterface();
1632         const VkInstance                                instance                                = context.getInstance();
1633         const InstanceDriver                    instanceDriver                  (platformInterface, instance);
1634
1635         instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1636
1637         switch (format)
1638         {
1639                 default:
1640                         return true;
1641                 case VK_FORMAT_R16_SFLOAT:
1642                 case VK_FORMAT_R16G16_SFLOAT:
1643                 case VK_FORMAT_R16G16B16_SFLOAT:
1644                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1645                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderFloat16 & storage16bit.storageBuffer16BitAccess ? true : false;
1646                 case VK_FORMAT_R64_SFLOAT:
1647                 case VK_FORMAT_R64G64_SFLOAT:
1648                 case VK_FORMAT_R64G64B64_SFLOAT:
1649                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1650                         return features2.features.shaderFloat64 ? true : false;
1651                 case VK_FORMAT_R8_SINT:
1652                 case VK_FORMAT_R8G8_SINT:
1653                 case VK_FORMAT_R8G8B8_SINT:
1654                 case VK_FORMAT_R8G8B8A8_SINT:
1655                 case VK_FORMAT_R8_UINT:
1656                 case VK_FORMAT_R8G8_UINT:
1657                 case VK_FORMAT_R8G8B8_UINT:
1658                 case VK_FORMAT_R8G8B8A8_UINT:
1659                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderInt8 ? true : false;
1660                 case VK_FORMAT_R16_SINT:
1661                 case VK_FORMAT_R16G16_SINT:
1662                 case VK_FORMAT_R16G16B16_SINT:
1663                 case VK_FORMAT_R16G16B16A16_SINT:
1664                 case VK_FORMAT_R16_UINT:
1665                 case VK_FORMAT_R16G16_UINT:
1666                 case VK_FORMAT_R16G16B16_UINT:
1667                 case VK_FORMAT_R16G16B16A16_UINT:
1668                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt16 & storage16bit.storageBuffer16BitAccess ? true : false;
1669                 case VK_FORMAT_R64_SINT:
1670                 case VK_FORMAT_R64G64_SINT:
1671                 case VK_FORMAT_R64G64B64_SINT:
1672                 case VK_FORMAT_R64G64B64A64_SINT:
1673                 case VK_FORMAT_R64_UINT:
1674                 case VK_FORMAT_R64G64_UINT:
1675                 case VK_FORMAT_R64G64B64_UINT:
1676                 case VK_FORMAT_R64G64B64A64_UINT:
1677                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt64 ? true : false;
1678         }
1679 }
1680
1681 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1682 {
1683         return context.contextSupports(vk::ApiVersion(1, 2, 0)) &&
1684                 vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1685 }
1686
1687 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1688 {
1689         switch (format)
1690         {
1691                 default:
1692                         DE_FATAL("Unhandled format!");
1693                         return "";
1694                 case VK_FORMAT_R8_SINT:
1695                         return "int8_t";
1696                 case VK_FORMAT_R8G8_SINT:
1697                         return "i8vec2";
1698                 case VK_FORMAT_R8G8B8_SINT:
1699                         return "i8vec3";
1700                 case VK_FORMAT_R8G8B8A8_SINT:
1701                         return "i8vec4";
1702                 case VK_FORMAT_R8_UINT:
1703                         return "uint8_t";
1704                 case VK_FORMAT_R8G8_UINT:
1705                         return "u8vec2";
1706                 case VK_FORMAT_R8G8B8_UINT:
1707                         return "u8vec3";
1708                 case VK_FORMAT_R8G8B8A8_UINT:
1709                         return "u8vec4";
1710                 case VK_FORMAT_R16_SINT:
1711                         return "int16_t";
1712                 case VK_FORMAT_R16G16_SINT:
1713                         return "i16vec2";
1714                 case VK_FORMAT_R16G16B16_SINT:
1715                         return "i16vec3";
1716                 case VK_FORMAT_R16G16B16A16_SINT:
1717                         return "i16vec4";
1718                 case VK_FORMAT_R16_UINT:
1719                         return "uint16_t";
1720                 case VK_FORMAT_R16G16_UINT:
1721                         return "u16vec2";
1722                 case VK_FORMAT_R16G16B16_UINT:
1723                         return "u16vec3";
1724                 case VK_FORMAT_R16G16B16A16_UINT:
1725                         return "u16vec4";
1726                 case VK_FORMAT_R32_SINT:
1727                         return "int";
1728                 case VK_FORMAT_R32G32_SINT:
1729                         return "ivec2";
1730                 case VK_FORMAT_R32G32B32_SINT:
1731                         return "ivec3";
1732                 case VK_FORMAT_R32G32B32A32_SINT:
1733                         return "ivec4";
1734                 case VK_FORMAT_R32_UINT:
1735                         return "uint";
1736                 case VK_FORMAT_R32G32_UINT:
1737                         return "uvec2";
1738                 case VK_FORMAT_R32G32B32_UINT:
1739                         return "uvec3";
1740                 case VK_FORMAT_R32G32B32A32_UINT:
1741                         return "uvec4";
1742                 case VK_FORMAT_R64_SINT:
1743                         return "int64_t";
1744                 case VK_FORMAT_R64G64_SINT:
1745                         return "i64vec2";
1746                 case VK_FORMAT_R64G64B64_SINT:
1747                         return "i64vec3";
1748                 case VK_FORMAT_R64G64B64A64_SINT:
1749                         return "i64vec4";
1750                 case VK_FORMAT_R64_UINT:
1751                         return "uint64_t";
1752                 case VK_FORMAT_R64G64_UINT:
1753                         return "u64vec2";
1754                 case VK_FORMAT_R64G64B64_UINT:
1755                         return "u64vec3";
1756                 case VK_FORMAT_R64G64B64A64_UINT:
1757                         return "u64vec4";
1758                 case VK_FORMAT_R16_SFLOAT:
1759                         return "float16_t";
1760                 case VK_FORMAT_R16G16_SFLOAT:
1761                         return "f16vec2";
1762                 case VK_FORMAT_R16G16B16_SFLOAT:
1763                         return "f16vec3";
1764                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1765                         return "f16vec4";
1766                 case VK_FORMAT_R32_SFLOAT:
1767                         return "float";
1768                 case VK_FORMAT_R32G32_SFLOAT:
1769                         return "vec2";
1770                 case VK_FORMAT_R32G32B32_SFLOAT:
1771                         return "vec3";
1772                 case VK_FORMAT_R32G32B32A32_SFLOAT:
1773                         return "vec4";
1774                 case VK_FORMAT_R64_SFLOAT:
1775                         return "double";
1776                 case VK_FORMAT_R64G64_SFLOAT:
1777                         return "dvec2";
1778                 case VK_FORMAT_R64G64B64_SFLOAT:
1779                         return "dvec3";
1780                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1781                         return "dvec4";
1782                 case VK_FORMAT_R8_USCALED:
1783                         return "bool";
1784                 case VK_FORMAT_R8G8_USCALED:
1785                         return "bvec2";
1786                 case VK_FORMAT_R8G8B8_USCALED:
1787                         return "bvec3";
1788                 case VK_FORMAT_R8G8B8A8_USCALED:
1789                         return "bvec4";
1790         }
1791 }
1792
1793 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1794 {
1795         switch (format)
1796         {
1797                 default:
1798                         return "";
1799                 case VK_FORMAT_R8_SINT:
1800                 case VK_FORMAT_R8G8_SINT:
1801                 case VK_FORMAT_R8G8B8_SINT:
1802                 case VK_FORMAT_R8G8B8A8_SINT:
1803                 case VK_FORMAT_R8_UINT:
1804                 case VK_FORMAT_R8G8_UINT:
1805                 case VK_FORMAT_R8G8B8_UINT:
1806                 case VK_FORMAT_R8G8B8A8_UINT:
1807                         return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1808                 case VK_FORMAT_R16_SINT:
1809                 case VK_FORMAT_R16G16_SINT:
1810                 case VK_FORMAT_R16G16B16_SINT:
1811                 case VK_FORMAT_R16G16B16A16_SINT:
1812                 case VK_FORMAT_R16_UINT:
1813                 case VK_FORMAT_R16G16_UINT:
1814                 case VK_FORMAT_R16G16B16_UINT:
1815                 case VK_FORMAT_R16G16B16A16_UINT:
1816                         return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1817                 case VK_FORMAT_R64_SINT:
1818                 case VK_FORMAT_R64G64_SINT:
1819                 case VK_FORMAT_R64G64B64_SINT:
1820                 case VK_FORMAT_R64G64B64A64_SINT:
1821                 case VK_FORMAT_R64_UINT:
1822                 case VK_FORMAT_R64G64_UINT:
1823                 case VK_FORMAT_R64G64B64_UINT:
1824                 case VK_FORMAT_R64G64B64A64_UINT:
1825                         return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1826                 case VK_FORMAT_R16_SFLOAT:
1827                 case VK_FORMAT_R16G16_SFLOAT:
1828                 case VK_FORMAT_R16G16B16_SFLOAT:
1829                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1830                         return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1831         }
1832 }
1833
1834 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1835 {
1836         std::vector<VkFormat> formats;
1837
1838         formats.push_back(VK_FORMAT_R8_SINT);
1839         formats.push_back(VK_FORMAT_R8G8_SINT);
1840         formats.push_back(VK_FORMAT_R8G8B8_SINT);
1841         formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1842         formats.push_back(VK_FORMAT_R8_UINT);
1843         formats.push_back(VK_FORMAT_R8G8_UINT);
1844         formats.push_back(VK_FORMAT_R8G8B8_UINT);
1845         formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1846         formats.push_back(VK_FORMAT_R16_SINT);
1847         formats.push_back(VK_FORMAT_R16G16_SINT);
1848         formats.push_back(VK_FORMAT_R16G16B16_SINT);
1849         formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1850         formats.push_back(VK_FORMAT_R16_UINT);
1851         formats.push_back(VK_FORMAT_R16G16_UINT);
1852         formats.push_back(VK_FORMAT_R16G16B16_UINT);
1853         formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1854         formats.push_back(VK_FORMAT_R32_SINT);
1855         formats.push_back(VK_FORMAT_R32G32_SINT);
1856         formats.push_back(VK_FORMAT_R32G32B32_SINT);
1857         formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1858         formats.push_back(VK_FORMAT_R32_UINT);
1859         formats.push_back(VK_FORMAT_R32G32_UINT);
1860         formats.push_back(VK_FORMAT_R32G32B32_UINT);
1861         formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1862         formats.push_back(VK_FORMAT_R64_SINT);
1863         formats.push_back(VK_FORMAT_R64G64_SINT);
1864         formats.push_back(VK_FORMAT_R64G64B64_SINT);
1865         formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
1866         formats.push_back(VK_FORMAT_R64_UINT);
1867         formats.push_back(VK_FORMAT_R64G64_UINT);
1868         formats.push_back(VK_FORMAT_R64G64B64_UINT);
1869         formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
1870         formats.push_back(VK_FORMAT_R16_SFLOAT);
1871         formats.push_back(VK_FORMAT_R16G16_SFLOAT);
1872         formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
1873         formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
1874         formats.push_back(VK_FORMAT_R32_SFLOAT);
1875         formats.push_back(VK_FORMAT_R32G32_SFLOAT);
1876         formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
1877         formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
1878         formats.push_back(VK_FORMAT_R64_SFLOAT);
1879         formats.push_back(VK_FORMAT_R64G64_SFLOAT);
1880         formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
1881         formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
1882         formats.push_back(VK_FORMAT_R8_USCALED);
1883         formats.push_back(VK_FORMAT_R8G8_USCALED);
1884         formats.push_back(VK_FORMAT_R8G8B8_USCALED);
1885         formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
1886
1887         return formats;
1888 }
1889
1890 bool vkt::subgroups::isFormatSigned (VkFormat format)
1891 {
1892         switch (format)
1893         {
1894                 default:
1895                         return false;
1896                 case VK_FORMAT_R8_SINT:
1897                 case VK_FORMAT_R8G8_SINT:
1898                 case VK_FORMAT_R8G8B8_SINT:
1899                 case VK_FORMAT_R8G8B8A8_SINT:
1900                 case VK_FORMAT_R16_SINT:
1901                 case VK_FORMAT_R16G16_SINT:
1902                 case VK_FORMAT_R16G16B16_SINT:
1903                 case VK_FORMAT_R16G16B16A16_SINT:
1904                 case VK_FORMAT_R32_SINT:
1905                 case VK_FORMAT_R32G32_SINT:
1906                 case VK_FORMAT_R32G32B32_SINT:
1907                 case VK_FORMAT_R32G32B32A32_SINT:
1908                 case VK_FORMAT_R64_SINT:
1909                 case VK_FORMAT_R64G64_SINT:
1910                 case VK_FORMAT_R64G64B64_SINT:
1911                 case VK_FORMAT_R64G64B64A64_SINT:
1912                         return true;
1913         }
1914 }
1915
1916 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
1917 {
1918         switch (format)
1919         {
1920                 default:
1921                         return false;
1922                 case VK_FORMAT_R8_UINT:
1923                 case VK_FORMAT_R8G8_UINT:
1924                 case VK_FORMAT_R8G8B8_UINT:
1925                 case VK_FORMAT_R8G8B8A8_UINT:
1926                 case VK_FORMAT_R16_UINT:
1927                 case VK_FORMAT_R16G16_UINT:
1928                 case VK_FORMAT_R16G16B16_UINT:
1929                 case VK_FORMAT_R16G16B16A16_UINT:
1930                 case VK_FORMAT_R32_UINT:
1931                 case VK_FORMAT_R32G32_UINT:
1932                 case VK_FORMAT_R32G32B32_UINT:
1933                 case VK_FORMAT_R32G32B32A32_UINT:
1934                 case VK_FORMAT_R64_UINT:
1935                 case VK_FORMAT_R64G64_UINT:
1936                 case VK_FORMAT_R64G64B64_UINT:
1937                 case VK_FORMAT_R64G64B64A64_UINT:
1938                         return true;
1939         }
1940 }
1941
1942 bool vkt::subgroups::isFormatFloat (VkFormat format)
1943 {
1944         switch (format)
1945         {
1946                 default:
1947                         return false;
1948                 case VK_FORMAT_R16_SFLOAT:
1949                 case VK_FORMAT_R16G16_SFLOAT:
1950                 case VK_FORMAT_R16G16B16_SFLOAT:
1951                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1952                 case VK_FORMAT_R32_SFLOAT:
1953                 case VK_FORMAT_R32G32_SFLOAT:
1954                 case VK_FORMAT_R32G32B32_SFLOAT:
1955                 case VK_FORMAT_R32G32B32A32_SFLOAT:
1956                 case VK_FORMAT_R64_SFLOAT:
1957                 case VK_FORMAT_R64G64_SFLOAT:
1958                 case VK_FORMAT_R64G64B64_SFLOAT:
1959                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1960                         return true;
1961         }
1962 }
1963
1964 bool vkt::subgroups::isFormatBool (VkFormat format)
1965 {
1966         switch (format)
1967         {
1968                 default:
1969                         return false;
1970                 case VK_FORMAT_R8_USCALED:
1971                 case VK_FORMAT_R8G8_USCALED:
1972                 case VK_FORMAT_R8G8B8_USCALED:
1973                 case VK_FORMAT_R8G8B8A8_USCALED:
1974                         return true;
1975         }
1976 }
1977
1978 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
1979 {
1980         /*
1981                 "layout(location = 0) in highp vec4 in_position;\n"
1982                 "void main (void)\n"
1983                 "{\n"
1984                 "  gl_Position = in_position;\n"
1985                 "  gl_PointSize = 1.0f;\n"
1986                 "}\n";
1987         */
1988         programCollection.spirvAsmSources.add("vert") <<
1989                 "; SPIR-V\n"
1990                 "; Version: 1.3\n"
1991                 "; Generator: Khronos Glslang Reference Front End; 7\n"
1992                 "; Bound: 25\n"
1993                 "; Schema: 0\n"
1994                 "OpCapability Shader\n"
1995                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1996                 "OpMemoryModel Logical GLSL450\n"
1997                 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
1998                 "OpMemberDecorate %11 0 BuiltIn Position\n"
1999                 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2000                 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2001                 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2002                 "OpDecorate %11 Block\n"
2003                 "OpDecorate %17 Location 0\n"
2004                 "%2 = OpTypeVoid\n"
2005                 "%3 = OpTypeFunction %2\n"
2006                 "%6 = OpTypeFloat 32\n"
2007                 "%7 = OpTypeVector %6 4\n"
2008                 "%8 = OpTypeInt 32 0\n"
2009                 "%9 = OpConstant %8 1\n"
2010                 "%10 = OpTypeArray %6 %9\n"
2011                 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2012                 "%12 = OpTypePointer Output %11\n"
2013                 "%13 = OpVariable %12 Output\n"
2014                 "%14 = OpTypeInt 32 1\n"
2015                 "%15 = OpConstant %14 0\n"
2016                 "%16 = OpTypePointer Input %7\n"
2017                 "%17 = OpVariable %16 Input\n"
2018                 "%19 = OpTypePointer Output %7\n"
2019                 "%21 = OpConstant %14 1\n"
2020                 "%22 = OpConstant %6 1\n"
2021                 "%23 = OpTypePointer Output %6\n"
2022                 "%4 = OpFunction %2 None %3\n"
2023                 "%5 = OpLabel\n"
2024                 "%18 = OpLoad %7 %17\n"
2025                 "%20 = OpAccessChain %19 %13 %15\n"
2026                 "OpStore %20 %18\n"
2027                 "%24 = OpAccessChain %23 %13 %21\n"
2028                 "OpStore %24 %22\n"
2029                 "OpReturn\n"
2030                 "OpFunctionEnd\n";
2031 }
2032
2033 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2034 {
2035         /*
2036                 "layout(location = 0) in float in_color;\n"
2037                 "layout(location = 0) out uint out_color;\n"
2038                 "void main()\n"
2039                 {\n"
2040                 "       out_color = uint(in_color);\n"
2041                 "}\n";
2042         */
2043         programCollection.spirvAsmSources.add("fragment") <<
2044                 "; SPIR-V\n"
2045                 "; Version: 1.3\n"
2046                 "; Generator: Khronos Glslang Reference Front End; 2\n"
2047                 "; Bound: 14\n"
2048                 "; Schema: 0\n"
2049                 "OpCapability Shader\n"
2050                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2051                 "OpMemoryModel Logical GLSL450\n"
2052                 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2053                 "OpExecutionMode %4 OriginUpperLeft\n"
2054                 "OpDecorate %8 Location 0\n"
2055                 "OpDecorate %11 Location 0\n"
2056                 "%2 = OpTypeVoid\n"
2057                 "%3 = OpTypeFunction %2\n"
2058                 "%6 = OpTypeInt 32 0\n"
2059                 "%7 = OpTypePointer Output %6\n"
2060                 "%8 = OpVariable %7 Output\n"
2061                 "%9 = OpTypeFloat 32\n"
2062                 "%10 = OpTypePointer Input %9\n"
2063                 "%11 = OpVariable %10 Input\n"
2064                 "%4 = OpFunction %2 None %3\n"
2065                 "%5 = OpLabel\n"
2066                 "%12 = OpLoad %9 %11\n"
2067                 "%13 = OpConvertFToU %6 %12\n"
2068                 "OpStore %8 %13\n"
2069                 "OpReturn\n"
2070                 "OpFunctionEnd\n";
2071 }
2072
2073 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2074 {
2075         /*
2076                 "#extension GL_KHR_shader_subgroup_basic: enable\n"
2077                 "#extension GL_EXT_tessellation_shader : require\n"
2078                 "layout(vertices = 2) out;\n"
2079                 "void main (void)\n"
2080                 "{\n"
2081                 "  if (gl_InvocationID == 0)\n"
2082                 "  {\n"
2083                 "    gl_TessLevelOuter[0] = 1.0f;\n"
2084                 "    gl_TessLevelOuter[1] = 1.0f;\n"
2085                 "  }\n"
2086                 "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2087                 "}\n";
2088         */
2089         programCollection.spirvAsmSources.add("tesc") <<
2090                 "; SPIR-V\n"
2091                 "; Version: 1.3\n"
2092                 "; Generator: Khronos Glslang Reference Front End; 2\n"
2093                 "; Bound: 46\n"
2094                 "; Schema: 0\n"
2095                 "OpCapability Tessellation\n"
2096                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2097                 "OpMemoryModel Logical GLSL450\n"
2098                 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2099                 "OpExecutionMode %4 OutputVertices 2\n"
2100                 "OpDecorate %8 BuiltIn InvocationId\n"
2101                 "OpDecorate %20 Patch\n"
2102                 "OpDecorate %20 BuiltIn TessLevelOuter\n"
2103                 "OpMemberDecorate %29 0 BuiltIn Position\n"
2104                 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2105                 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2106                 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2107                 "OpDecorate %29 Block\n"
2108                 "OpMemberDecorate %35 0 BuiltIn Position\n"
2109                 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2110                 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2111                 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2112                 "OpDecorate %35 Block\n"
2113                 "%2 = OpTypeVoid\n"
2114                 "%3 = OpTypeFunction %2\n"
2115                 "%6 = OpTypeInt 32 1\n"
2116                 "%7 = OpTypePointer Input %6\n"
2117                 "%8 = OpVariable %7 Input\n"
2118                 "%10 = OpConstant %6 0\n"
2119                 "%11 = OpTypeBool\n"
2120                 "%15 = OpTypeFloat 32\n"
2121                 "%16 = OpTypeInt 32 0\n"
2122                 "%17 = OpConstant %16 4\n"
2123                 "%18 = OpTypeArray %15 %17\n"
2124                 "%19 = OpTypePointer Output %18\n"
2125                 "%20 = OpVariable %19 Output\n"
2126                 "%21 = OpConstant %15 1\n"
2127                 "%22 = OpTypePointer Output %15\n"
2128                 "%24 = OpConstant %6 1\n"
2129                 "%26 = OpTypeVector %15 4\n"
2130                 "%27 = OpConstant %16 1\n"
2131                 "%28 = OpTypeArray %15 %27\n"
2132                 "%29 = OpTypeStruct %26 %15 %28 %28\n"
2133                 "%30 = OpConstant %16 2\n"
2134                 "%31 = OpTypeArray %29 %30\n"
2135                 "%32 = OpTypePointer Output %31\n"
2136                 "%33 = OpVariable %32 Output\n"
2137                 "%35 = OpTypeStruct %26 %15 %28 %28\n"
2138                 "%36 = OpConstant %16 32\n"
2139                 "%37 = OpTypeArray %35 %36\n"
2140                 "%38 = OpTypePointer Input %37\n"
2141                 "%39 = OpVariable %38 Input\n"
2142                 "%41 = OpTypePointer Input %26\n"
2143                 "%44 = OpTypePointer Output %26\n"
2144                 "%4 = OpFunction %2 None %3\n"
2145                 "%5 = OpLabel\n"
2146                 "%9 = OpLoad %6 %8\n"
2147                 "%12 = OpIEqual %11 %9 %10\n"
2148                 "OpSelectionMerge %14 None\n"
2149                 "OpBranchConditional %12 %13 %14\n"
2150                 "%13 = OpLabel\n"
2151                 "%23 = OpAccessChain %22 %20 %10\n"
2152                 "OpStore %23 %21\n"
2153                 "%25 = OpAccessChain %22 %20 %24\n"
2154                 "OpStore %25 %21\n"
2155                 "OpBranch %14\n"
2156                 "%14 = OpLabel\n"
2157                 "%34 = OpLoad %6 %8\n"
2158                 "%40 = OpLoad %6 %8\n"
2159                 "%42 = OpAccessChain %41 %39 %40 %10\n"
2160                 "%43 = OpLoad %26 %42\n"
2161                 "%45 = OpAccessChain %44 %33 %34 %10\n"
2162                 "OpStore %45 %43\n"
2163                 "OpReturn\n"
2164                 "OpFunctionEnd\n";
2165 }
2166
2167 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2168 {
2169         /*
2170                 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2171                 "#extension GL_EXT_tessellation_shader : require\n"
2172                 "layout(isolines, equal_spacing, ccw ) in;\n"
2173                 "layout(location = 0) in float in_color[];\n"
2174                 "layout(location = 0) out float out_color;\n"
2175                 "\n"
2176                 "void main (void)\n"
2177                 "{\n"
2178                 "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2179                 "  out_color = in_color[0];\n"
2180                 "}\n";
2181         */
2182         programCollection.spirvAsmSources.add("tese") <<
2183                 "; SPIR-V\n"
2184                 "; Version: 1.3\n"
2185                 "; Generator: Khronos Glslang Reference Front End; 2\n"
2186                 "; Bound: 45\n"
2187                 "; Schema: 0\n"
2188                 "OpCapability Tessellation\n"
2189                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2190                 "OpMemoryModel Logical GLSL450\n"
2191                 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2192                 "OpExecutionMode %4 Isolines\n"
2193                 "OpExecutionMode %4 SpacingEqual\n"
2194                 "OpExecutionMode %4 VertexOrderCcw\n"
2195                 "OpMemberDecorate %11 0 BuiltIn Position\n"
2196                 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2197                 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2198                 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2199                 "OpDecorate %11 Block\n"
2200                 "OpMemberDecorate %16 0 BuiltIn Position\n"
2201                 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2202                 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2203                 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2204                 "OpDecorate %16 Block\n"
2205                 "OpDecorate %29 BuiltIn TessCoord\n"
2206                 "OpDecorate %39 Location 0\n"
2207                 "OpDecorate %42 Location 0\n"
2208                 "%2 = OpTypeVoid\n"
2209                 "%3 = OpTypeFunction %2\n"
2210                 "%6 = OpTypeFloat 32\n"
2211                 "%7 = OpTypeVector %6 4\n"
2212                 "%8 = OpTypeInt 32 0\n"
2213                 "%9 = OpConstant %8 1\n"
2214                 "%10 = OpTypeArray %6 %9\n"
2215                 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2216                 "%12 = OpTypePointer Output %11\n"
2217                 "%13 = OpVariable %12 Output\n"
2218                 "%14 = OpTypeInt 32 1\n"
2219                 "%15 = OpConstant %14 0\n"
2220                 "%16 = OpTypeStruct %7 %6 %10 %10\n"
2221                 "%17 = OpConstant %8 32\n"
2222                 "%18 = OpTypeArray %16 %17\n"
2223                 "%19 = OpTypePointer Input %18\n"
2224                 "%20 = OpVariable %19 Input\n"
2225                 "%21 = OpTypePointer Input %7\n"
2226                 "%24 = OpConstant %14 1\n"
2227                 "%27 = OpTypeVector %6 3\n"
2228                 "%28 = OpTypePointer Input %27\n"
2229                 "%29 = OpVariable %28 Input\n"
2230                 "%30 = OpConstant %8 0\n"
2231                 "%31 = OpTypePointer Input %6\n"
2232                 "%36 = OpTypePointer Output %7\n"
2233                 "%38 = OpTypePointer Output %6\n"
2234                 "%39 = OpVariable %38 Output\n"
2235                 "%40 = OpTypeArray %6 %17\n"
2236                 "%41 = OpTypePointer Input %40\n"
2237                 "%42 = OpVariable %41 Input\n"
2238                 "%4 = OpFunction %2 None %3\n"
2239                 "%5 = OpLabel\n"
2240                 "%22 = OpAccessChain %21 %20 %15 %15\n"
2241                 "%23 = OpLoad %7 %22\n"
2242                 "%25 = OpAccessChain %21 %20 %24 %15\n"
2243                 "%26 = OpLoad %7 %25\n"
2244                 "%32 = OpAccessChain %31 %29 %30\n"
2245                 "%33 = OpLoad %6 %32\n"
2246                 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2247                 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2248                 "%37 = OpAccessChain %36 %13 %15\n"
2249                 "OpStore %37 %35\n"
2250                 "%43 = OpAccessChain %31 %42 %15\n"
2251                 "%44 = OpLoad %6 %43\n"
2252                 "OpStore %39 %44\n"
2253                 "OpReturn\n"
2254                 "OpFunctionEnd\n";
2255 }
2256
2257 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
2258 {
2259         tcu::StringTemplate geometryTemplate(glslTemplate);
2260
2261         map<string, string>             linesParams;
2262         linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2263
2264         map<string, string>             pointsParams;
2265         pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2266
2267         collection.add("geometry_lines")        << glu::GeometrySource(geometryTemplate.specialize(linesParams))        << options;
2268         collection.add("geometry_points")       << glu::GeometrySource(geometryTemplate.specialize(pointsParams))       << options;
2269 }
2270
2271 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2272 {
2273         tcu::StringTemplate geometryTemplate(spirvTemplate);
2274
2275         map<string, string>             linesParams;
2276         linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2277
2278         map<string, string>             pointsParams;
2279         pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2280
2281         collection.add("geometry_lines")        << geometryTemplate.specialize(linesParams)             << options;
2282         collection.add("geometry_points")       << geometryTemplate.specialize(pointsParams)    << options;
2283 }
2284
2285 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
2286 {
2287         const vk::VkFormat format = data.format;
2288         const vk::VkDeviceSize size = data.numElements *
2289                 (data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2290         if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2291         {
2292                 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2293
2294                 switch (format)
2295                 {
2296                         default:
2297                                 DE_FATAL("Illegal buffer format");
2298                                 break;
2299                         case VK_FORMAT_R8_SINT:
2300                         case VK_FORMAT_R8G8_SINT:
2301                         case VK_FORMAT_R8G8B8_SINT:
2302                         case VK_FORMAT_R8G8B8A8_SINT:
2303                         case VK_FORMAT_R8_UINT:
2304                         case VK_FORMAT_R8G8_UINT:
2305                         case VK_FORMAT_R8G8B8_UINT:
2306                         case VK_FORMAT_R8G8B8A8_UINT:
2307                         {
2308                                 deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2309
2310                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2311                                 {
2312                                         ptr[k] = rnd.getUint8();
2313                                 }
2314                         }
2315                         break;
2316                         case VK_FORMAT_R16_SINT:
2317                         case VK_FORMAT_R16G16_SINT:
2318                         case VK_FORMAT_R16G16B16_SINT:
2319                         case VK_FORMAT_R16G16B16A16_SINT:
2320                         case VK_FORMAT_R16_UINT:
2321                         case VK_FORMAT_R16G16_UINT:
2322                         case VK_FORMAT_R16G16B16_UINT:
2323                         case VK_FORMAT_R16G16B16A16_UINT:
2324                         {
2325                                 deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2326
2327                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2328                                 {
2329                                         ptr[k] = rnd.getUint16();
2330                                 }
2331                         }
2332                         break;
2333                         case VK_FORMAT_R8_USCALED:
2334                         case VK_FORMAT_R8G8_USCALED:
2335                         case VK_FORMAT_R8G8B8_USCALED:
2336                         case VK_FORMAT_R8G8B8A8_USCALED:
2337                         {
2338                                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2339
2340                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2341                                 {
2342                                         deUint32 r = rnd.getUint32();
2343                                         ptr[k] = (r & 1) ? r : 0;
2344                                 }
2345                         }
2346                         break;
2347                         case VK_FORMAT_R32_SINT:
2348                         case VK_FORMAT_R32G32_SINT:
2349                         case VK_FORMAT_R32G32B32_SINT:
2350                         case VK_FORMAT_R32G32B32A32_SINT:
2351                         case VK_FORMAT_R32_UINT:
2352                         case VK_FORMAT_R32G32_UINT:
2353                         case VK_FORMAT_R32G32B32_UINT:
2354                         case VK_FORMAT_R32G32B32A32_UINT:
2355                         {
2356                                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2357
2358                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2359                                 {
2360                                         ptr[k] = rnd.getUint32();
2361                                 }
2362                         }
2363                         break;
2364                         case VK_FORMAT_R64_SINT:
2365                         case VK_FORMAT_R64G64_SINT:
2366                         case VK_FORMAT_R64G64B64_SINT:
2367                         case VK_FORMAT_R64G64B64A64_SINT:
2368                         case VK_FORMAT_R64_UINT:
2369                         case VK_FORMAT_R64G64_UINT:
2370                         case VK_FORMAT_R64G64B64_UINT:
2371                         case VK_FORMAT_R64G64B64A64_UINT:
2372                         {
2373                                 deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2374
2375                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2376                                 {
2377                                         ptr[k] = rnd.getUint64();
2378                                 }
2379                         }
2380                         break;
2381                         case VK_FORMAT_R16_SFLOAT:
2382                         case VK_FORMAT_R16G16_SFLOAT:
2383                         case VK_FORMAT_R16G16B16_SFLOAT:
2384                         case VK_FORMAT_R16G16B16A16_SFLOAT:
2385                         {
2386                                 deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2387
2388                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2389                                 {
2390                                         ptr[k] = deFloat32To16(rnd.getFloat());
2391                                 }
2392                         }
2393                         break;
2394                         case VK_FORMAT_R32_SFLOAT:
2395                         case VK_FORMAT_R32G32_SFLOAT:
2396                         case VK_FORMAT_R32G32B32_SFLOAT:
2397                         case VK_FORMAT_R32G32B32A32_SFLOAT:
2398                         {
2399                                 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2400
2401                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2402                                 {
2403                                         ptr[k] = rnd.getFloat();
2404                                 }
2405                         }
2406                         break;
2407                         case VK_FORMAT_R64_SFLOAT:
2408                         case VK_FORMAT_R64G64_SFLOAT:
2409                         case VK_FORMAT_R64G64B64_SFLOAT:
2410                         case VK_FORMAT_R64G64B64A64_SFLOAT:
2411                         {
2412                                 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2413
2414                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2415                                 {
2416                                         ptr[k] = rnd.getDouble();
2417                                 }
2418                         }
2419                         break;
2420                 }
2421         }
2422         else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2423         {
2424                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2425
2426                 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2427                 {
2428                         ptr[k] = 0;
2429                 }
2430         }
2431
2432         if (subgroups::SSBOData::InitializeNone != data.initializeType)
2433         {
2434                 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2435         }
2436 }
2437
2438 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2439 {
2440         switch(shaderStage)
2441         {
2442                 case VK_SHADER_STAGE_VERTEX_BIT:
2443                         return 0u;
2444                         break;
2445                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2446                         return 1u;
2447                         break;
2448                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2449                         return 2u;
2450                         break;
2451                 case VK_SHADER_STAGE_GEOMETRY_BIT:
2452                         return 3u;
2453                         break;
2454                 default:
2455                         DE_ASSERT(0);
2456                         return -1;
2457         }
2458         DE_ASSERT(0);
2459         return -1;
2460 }
2461
2462 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
2463         Context& context, VkFormat format, SSBOData* extraData,
2464         deUint32 extraDataCount, const void* internalData,
2465         bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2466         const VkShaderStageFlags shaderStage)
2467 {
2468         return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2469 }
2470
2471 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2472         Context& context, VkFormat format, SSBOData* extraData,
2473         deUint32 extraDataCount, const void* internalData,
2474         bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2475         const VkShaderStageFlags shaderStage, const deUint32 tessShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
2476 {
2477         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
2478         const VkDevice                                                  device                                  = context.getDevice();
2479         const deUint32                                                  maxWidth                                = getMaxWidth();
2480         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
2481         DescriptorSetLayoutBuilder                              layoutBuilder;
2482         DescriptorPoolBuilder                                   poolBuilder;
2483         DescriptorSetUpdateBuilder                              updateBuilder;
2484         Move <VkDescriptorPool>                                 descriptorPool;
2485         Move <VkDescriptorSet>                                  descriptorSet;
2486
2487         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(vk, device,
2488                                                                                                                                                 context.getBinaryCollection().get("vert"), 0u));
2489         const Unique<VkShaderModule>                    teCtrlShaderModule              (createShaderModule(vk, device,
2490                                                                                                                                                 context.getBinaryCollection().get("tesc"), 0u));
2491         const Unique<VkShaderModule>                    teEvalShaderModule              (createShaderModule(vk, device,
2492                                                                                                                                                 context.getBinaryCollection().get("tese"), 0u));
2493         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(vk, device,
2494                                                                                                                                         context.getBinaryCollection().get("fragment"), 0u));
2495         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
2496
2497         const VkVertexInputBindingDescription   vertexInputBinding              =
2498         {
2499                 0u,                                                                                     // binding;
2500                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
2501                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
2502         };
2503
2504         const VkVertexInputAttributeDescription vertexInputAttribute    =
2505         {
2506                 0u,
2507                 0u,
2508                 VK_FORMAT_R32G32B32A32_SFLOAT,
2509                 0u
2510         };
2511
2512         for (deUint32 i = 0u; i < extraDataCount; i++)
2513         {
2514                 if (extraData[i].isImage)
2515                 {
2516                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2517                 }
2518                 else
2519                 {
2520                         vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2521                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2522                 }
2523                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2524                 initializeMemory(context, alloc, extraData[i]);
2525         }
2526
2527         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2528                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2529
2530         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(vk, device));
2531
2532         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
2533
2534         const deUint32 requiredSubgroupSizes[5] = {0u,
2535                                                                                            ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2536                                                                                            ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2537                                                                                            0u,
2538                                                                                            0u};
2539
2540         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
2541                                                                                                                                                                                   VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2542                                                                                                                                                                                   VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2543                                                                                                                                                                                   *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2544                                                                                                                                                                                   *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2545                                                                                                                                                                                   0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2546                                                                                                                                                                                   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2547                                                                                                                                                                                   0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2548
2549         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2550                 poolBuilder.addType(inputBuffers[ndx]->getType());
2551
2552         if (extraDataCount > 0)
2553         {
2554                 descriptorPool = poolBuilder.build(vk, device,
2555                                                         VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2556                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2557         }
2558
2559         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2560         {
2561                 if (inputBuffers[buffersNdx]->isImage())
2562                 {
2563                         VkDescriptorImageInfo info =
2564                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2565                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2566
2567                         updateBuilder.writeSingle(*descriptorSet,
2568                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2569                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2570                 }
2571                 else
2572                 {
2573                         VkDescriptorBufferInfo info =
2574                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2575                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2576
2577                         updateBuilder.writeSingle(*descriptorSet,
2578                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2579                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2580                 }
2581         }
2582
2583         updateBuilder.update(vk, device);
2584
2585         const VkQueue                                                   queue                                   = context.getUniversalQueue();
2586         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
2587         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
2588         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
2589         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
2590         const vk::VkDeviceSize                                  vertexBufferSize                = 2ull * maxWidth * sizeof(tcu::Vec4);
2591         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2592         unsigned                                                                totalIterations                 = 0u;
2593         unsigned                                                                failedIterations                = 0u;
2594         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2595
2596         {
2597                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
2598                 std::vector<tcu::Vec4>  data                            (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2599                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
2600                 float                                   leftHandPosition        = -1.0f;
2601
2602                 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2603                 {
2604                         data[ndx][0] = leftHandPosition;
2605                         leftHandPosition += pixelSize;
2606                         data[ndx+1][0] = leftHandPosition;
2607                 }
2608
2609                 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2610                 flushAlloc(vk, device, alloc);
2611         }
2612
2613         const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2614         const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
2615         const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
2616         const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2617         Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2618         const VkDeviceSize                      vertexBufferOffset      = 0u;
2619
2620         for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2621         {
2622                 totalIterations++;
2623
2624                 beginCommandBuffer(vk, *cmdBuffer);
2625                 {
2626
2627                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2628                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2629
2630                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2631
2632                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2633
2634                         if (extraDataCount > 0)
2635                         {
2636                                 vk.cmdBindDescriptorSets(*cmdBuffer,
2637                                         VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2638                                         &descriptorSet.get(), 0u, DE_NULL);
2639                         }
2640
2641                         vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2642                         vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2643
2644                         endRenderPass(vk, *cmdBuffer);
2645
2646                         copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2647                         endCommandBuffer(vk, *cmdBuffer);
2648
2649                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2650                 }
2651
2652                 {
2653                         const Allocation& allocResult = imageBufferResult.getAllocation();
2654                         invalidateAlloc(vk, device, allocResult);
2655
2656                         std::vector<const void*> datas;
2657                         datas.push_back(allocResult.getHostPtr());
2658                         if (!checkResult(internalData, datas, width/2u, subgroupSize))
2659                                 failedIterations++;
2660                 }
2661         }
2662
2663         if (0 < failedIterations)
2664         {
2665                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2666
2667                 context.getTestContext().getLog()
2668                                 << TestLog::Message << valuesPassed << " / "
2669                                 << totalIterations << " values passed" << TestLog::EndMessage;
2670                 return tcu::TestStatus::fail("Failed!");
2671         }
2672
2673         return tcu::TestStatus::pass("OK");
2674 }
2675
2676 bool vkt::subgroups::check(std::vector<const void*> datas,
2677         deUint32 width, deUint32 ref)
2678 {
2679         const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2680
2681         for (deUint32 n = 0; n < width; ++n)
2682         {
2683                 if (data[n] != ref)
2684                 {
2685                         return false;
2686                 }
2687         }
2688
2689         return true;
2690 }
2691
2692 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
2693         const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2694         deUint32 ref)
2695 {
2696         const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2697         const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2698         const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2699
2700         return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2701 }
2702
2703 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
2704         Context& context, VkFormat format, SSBOData* extraData,
2705         deUint32 extraDataCount, const void* internalData,
2706         bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2707 {
2708         return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult,
2709                                                                                                                    0u, 0u);
2710 }
2711
2712 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(
2713         Context& context, VkFormat format, SSBOData* extraData,
2714         deUint32 extraDataCount, const void* internalData,
2715         bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2716         const deUint32 geometryShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
2717 {
2718         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
2719         const VkDevice                                                  device                                  = context.getDevice();
2720         const deUint32                                                  maxWidth                                = getMaxWidth();
2721         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
2722         DescriptorSetLayoutBuilder                              layoutBuilder;
2723         DescriptorPoolBuilder                                   poolBuilder;
2724         DescriptorSetUpdateBuilder                              updateBuilder;
2725         Move <VkDescriptorPool>                                 descriptorPool;
2726         Move <VkDescriptorSet>                                  descriptorSet;
2727
2728         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2729         const Unique<VkShaderModule>                    geometryShaderModule    (createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2730         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2731         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
2732         const VkVertexInputBindingDescription   vertexInputBinding              =
2733         {
2734                 0u,                                                                                     // binding;
2735                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
2736                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
2737         };
2738
2739         const VkVertexInputAttributeDescription vertexInputAttribute    =
2740         {
2741                 0u,
2742                 0u,
2743                 VK_FORMAT_R32G32B32A32_SFLOAT,
2744                 0u
2745         };
2746
2747         for (deUint32 i = 0u; i < extraDataCount; i++)
2748         {
2749                 if (extraData[i].isImage)
2750                 {
2751                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2752                 }
2753                 else
2754                 {
2755                         vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2756                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2757                 }
2758                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2759                 initializeMemory(context, alloc, extraData[i]);
2760         }
2761
2762         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2763                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2764
2765         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(vk, device));
2766
2767         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
2768
2769         const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2770
2771         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
2772                                                                                                                                                                                   VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2773                                                                                                                                                                                   *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2774                                                                                                                                                                                   *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2775                                                                                                                                                                                   0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2776                                                                                                                                                                                   requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2777
2778         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2779                 poolBuilder.addType(inputBuffers[ndx]->getType());
2780
2781         if (extraDataCount > 0)
2782         {
2783                 descriptorPool = poolBuilder.build(vk, device,
2784                                                         VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2785                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2786         }
2787
2788         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2789         {
2790                 if (inputBuffers[buffersNdx]->isImage())
2791                 {
2792                         VkDescriptorImageInfo info =
2793                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2794                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2795
2796                         updateBuilder.writeSingle(*descriptorSet,
2797                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2798                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2799                 }
2800                 else
2801                 {
2802                         VkDescriptorBufferInfo info =
2803                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2804                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2805
2806                         updateBuilder.writeSingle(*descriptorSet,
2807                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2808                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2809                 }
2810         }
2811
2812         updateBuilder.update(vk, device);
2813
2814         const VkQueue                                                   queue                                   = context.getUniversalQueue();
2815         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
2816         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
2817         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
2818         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
2819         const vk::VkDeviceSize                                  vertexBufferSize                = maxWidth * sizeof(tcu::Vec4);
2820         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2821         unsigned                                                                totalIterations                 = 0u;
2822         unsigned                                                                failedIterations                = 0u;
2823         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2824
2825         {
2826                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
2827                 std::vector<tcu::Vec4>  data                            (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2828                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
2829                 float                                   leftHandPosition        = -1.0f;
2830
2831                 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2832                 {
2833                         data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2834                         leftHandPosition += pixelSize;
2835                 }
2836
2837                 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2838                 flushAlloc(vk, device, alloc);
2839         }
2840
2841         const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2842         const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
2843         const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
2844         const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2845         Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2846         const VkDeviceSize                      vertexBufferOffset      = 0u;
2847
2848         for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2849         {
2850                 totalIterations++;
2851
2852                 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2853                 {
2854                         const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2855                         initializeMemory(context, alloc, extraData[ndx]);
2856                 }
2857
2858                 beginCommandBuffer(vk, *cmdBuffer);
2859                 {
2860                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2861
2862                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2863
2864                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2865
2866                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2867
2868                         if (extraDataCount > 0)
2869                         {
2870                                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2871                                         &descriptorSet.get(), 0u, DE_NULL);
2872                         }
2873
2874                         vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2875
2876                         vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2877
2878                         endRenderPass(vk, *cmdBuffer);
2879
2880                         copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2881
2882                         endCommandBuffer(vk, *cmdBuffer);
2883
2884                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2885                 }
2886
2887                 {
2888                         const Allocation& allocResult = imageBufferResult.getAllocation();
2889                         invalidateAlloc(vk, device, allocResult);
2890
2891                         std::vector<const void*> datas;
2892                         datas.push_back(allocResult.getHostPtr());
2893                         if (!checkResult(internalData, datas, width, subgroupSize))
2894                                 failedIterations++;
2895                 }
2896         }
2897
2898         if (0 < failedIterations)
2899         {
2900                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2901
2902                 context.getTestContext().getLog()
2903                                 << TestLog::Message << valuesPassed << " / "
2904                                 << totalIterations << " values passed" << TestLog::EndMessage;
2905
2906                 return tcu::TestStatus::fail("Failed!");
2907         }
2908
2909         return tcu::TestStatus::pass("OK");
2910 }
2911
2912 tcu::TestStatus vkt::subgroups::allStages(
2913         Context& context, VkFormat format, SSBOData* extraData,
2914         deUint32 extraDataCount, const void* internalData,
2915         const VerificationFunctor& checkResult,
2916         const vk::VkShaderStageFlags shaderStage)
2917 {
2918         return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
2919                                                                                                                  0u, 0u, 0u, 0u, 0u, DE_NULL);
2920 }
2921
2922 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize(
2923         Context& context, VkFormat format, SSBOData* extraDatas,
2924         deUint32 extraDatasCount, const void* internalData,
2925         const VerificationFunctor& checkResult,
2926         const VkShaderStageFlags shaderStageTested,
2927         const deUint32 vertexShaderStageCreateFlags,
2928         const deUint32 tessellationControlShaderStageCreateFlags,
2929         const deUint32 tessellationEvalShaderStageCreateFlags,
2930         const deUint32 geometryShaderStageCreateFlags,
2931         const deUint32 fragmentShaderStageCreateFlags,
2932         const deUint32 requiredSubgroupSize[5])
2933 {
2934         const DeviceInterface&                  vk                                      = context.getDeviceInterface();
2935         const VkDevice                                  device                          = context.getDevice();
2936         const deUint32                                  maxWidth                        = getMaxWidth();
2937         vector<VkShaderStageFlagBits>   stagesVector;
2938         VkShaderStageFlags                              shaderStageRequired     = (VkShaderStageFlags)0ull;
2939
2940         Move<VkShaderModule>                    vertexShaderModule;
2941         Move<VkShaderModule>                    teCtrlShaderModule;
2942         Move<VkShaderModule>                    teEvalShaderModule;
2943         Move<VkShaderModule>                    geometryShaderModule;
2944         Move<VkShaderModule>                    fragmentShaderModule;
2945
2946         if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
2947         {
2948                 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
2949         }
2950         if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2951         {
2952                 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
2953                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
2954                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
2955         }
2956         if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2957         {
2958                 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
2959                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
2960                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2961         }
2962         if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
2963         {
2964                 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
2965                 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
2966                 shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
2967         }
2968         if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2969         {
2970                 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
2971                 shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
2972         }
2973
2974         const deUint32  stagesCount     = static_cast<deUint32>(stagesVector.size());
2975         const string    vert            = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)                                    ? "vert_noSubgroup"             : "vert";
2976         const string    tesc            = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)              ? "tesc_noSubgroup"             : "tesc";
2977         const string    tese            = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)   ? "tese_noSubgroup"             : "tese";
2978
2979         shaderStageRequired = shaderStageTested | shaderStageRequired;
2980
2981         vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
2982         if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2983         {
2984                 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
2985                 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
2986         }
2987         if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
2988         {
2989                 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2990                 {
2991                         // tessellation shaders output line primitives
2992                         geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
2993                 }
2994                 else
2995                 {
2996                         // otherwise points are processed by geometry shader
2997                         geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
2998                 }
2999         }
3000         if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3001                 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3002
3003         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3004
3005         DescriptorSetLayoutBuilder layoutBuilder;
3006         // The implicit result SSBO we use to store our outputs from the shader
3007         for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3008         {
3009                 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3010                 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3011                 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3012
3013                 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3014         }
3015
3016         for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3017         {
3018                 const deUint32 datasNdx = ndx - stagesCount;
3019                 if (extraDatas[datasNdx].isImage)
3020                 {
3021                         inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3022                 }
3023                 else
3024                 {
3025                         const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3026                         inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3027                 }
3028
3029                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3030                 initializeMemory(context, alloc, extraDatas[datasNdx]);
3031
3032                 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3033                                                                 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3034         }
3035
3036         const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3037
3038         const Unique<VkPipelineLayout> pipelineLayout(
3039                 makePipelineLayout(vk, device, *descriptorSetLayout));
3040
3041         const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3042         const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3043                                                                                                                    shaderStageRequired,
3044                                                                                                                    *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3045                                                                                                                    *renderPass,
3046                                                                                                                    (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3047                                                                                                                    DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3048                                                                                                                    vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3049                                                                                                                    geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3050
3051         Move <VkDescriptorPool> descriptorPool;
3052         Move <VkDescriptorSet>  descriptorSet;
3053
3054         if (inputBuffers.size() > 0)
3055         {
3056                 DescriptorPoolBuilder poolBuilder;
3057
3058                 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3059                 {
3060                         poolBuilder.addType(inputBuffers[ndx]->getType());
3061                 }
3062
3063                 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3064
3065                 // Create descriptor set
3066                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3067
3068                 DescriptorSetUpdateBuilder updateBuilder;
3069
3070                 for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3071                 {
3072                         deUint32 binding;
3073                         if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3074                         else binding = extraDatas[ndx -stagesCount].binding;
3075
3076                         if (inputBuffers[ndx]->isImage())
3077                         {
3078                                 VkDescriptorImageInfo info =
3079                                         makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3080                                                                                         inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3081
3082                                 updateBuilder.writeSingle(      *descriptorSet,
3083                                                                                         DescriptorSetUpdateBuilder::Location::binding(binding),
3084                                                                                         inputBuffers[ndx]->getType(), &info);
3085                         }
3086                         else
3087                         {
3088                                 VkDescriptorBufferInfo info =
3089                                         makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3090                                                         0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3091
3092                                 updateBuilder.writeSingle(      *descriptorSet,
3093                                                                                                         DescriptorSetUpdateBuilder::Location::binding(binding),
3094                                                                                                         inputBuffers[ndx]->getType(), &info);
3095                         }
3096                 }
3097
3098                 updateBuilder.update(vk, device);
3099         }
3100
3101         {
3102                 const VkQueue                                   queue                                   = context.getUniversalQueue();
3103                 const deUint32                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
3104                 const Unique<VkCommandPool>             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
3105                 const deUint32                                  subgroupSize                    = getSubgroupSize(context);
3106                 const Unique<VkCommandBuffer>   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
3107                 unsigned                                                totalIterations                 = 0u;
3108                 unsigned                                                failedIterations                = 0u;
3109                 Image                                                   resultImage                             (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3110                 const Unique<VkFramebuffer>             framebuffer                             (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3111                 const VkViewport                                viewport                                = makeViewport(maxWidth, 1u);
3112                 const VkRect2D                                  scissor                                 = makeRect2D(maxWidth, 1u);
3113                 const vk::VkDeviceSize                  imageResultSize                 = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3114                 Buffer                                                  imageBufferResult               (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3115                 const VkImageSubresourceRange   subresourceRange                =
3116                 {
3117                         VK_IMAGE_ASPECT_COLOR_BIT,                                                                                      //VkImageAspectFlags    aspectMask
3118                         0u,                                                                                                                                     //deUint32                              baseMipLevel
3119                         1u,                                                                                                                                     //deUint32                              levelCount
3120                         0u,                                                                                                                                     //deUint32                              baseArrayLayer
3121                         1u                                                                                                                                      //deUint32                              layerCount
3122                 };
3123
3124                 const VkImageMemoryBarrier              colorAttachmentBarrier  = makeImageMemoryBarrier(
3125                         (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3126                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3127                         resultImage.getImage(), subresourceRange);
3128
3129                 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3130                 {
3131                         for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3132                         {
3133                                 // re-init the data
3134                                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3135                                 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3136                         }
3137
3138                         totalIterations++;
3139
3140                         beginCommandBuffer(vk, *cmdBuffer);
3141
3142                         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3143
3144                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3145
3146                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3147
3148                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3149
3150                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3151
3152                         if (stagesCount + extraDatasCount > 0)
3153                                 vk.cmdBindDescriptorSets(*cmdBuffer,
3154                                                 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3155                                                 &descriptorSet.get(), 0u, DE_NULL);
3156
3157                         vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3158
3159                         endRenderPass(vk, *cmdBuffer);
3160
3161                         copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3162
3163                         endCommandBuffer(vk, *cmdBuffer);
3164
3165                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3166
3167                         for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3168                         {
3169                                 std::vector<const void*> datas;
3170                                 if (!inputBuffers[ndx]->isImage())
3171                                 {
3172                                         const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3173                                         invalidateAlloc(vk, device, resultAlloc);
3174                                         // we always have our result data first
3175                                         datas.push_back(resultAlloc.getHostPtr());
3176                                 }
3177
3178                                 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3179                                 {
3180                                         const deUint32 datasNdx = index - stagesCount;
3181                                         if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3182                                         {
3183                                                 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3184                                                 invalidateAlloc(vk, device, resultAlloc);
3185                                                 // we always have our result data first
3186                                                 datas.push_back(resultAlloc.getHostPtr());
3187                                         }
3188                                 }
3189
3190                                 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3191                                 const bool              multiCall       = (     stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT                                         ||
3192                                                                                                 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT           ||
3193                                                                                                 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT        ||
3194                                                                                                 stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT                                       );
3195                                 const deUint32  usedWidth       = ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3196
3197                                 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3198                                         failedIterations++;
3199                         }
3200                         if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3201                         {
3202                                 std::vector<const void*> datas;
3203                                 const Allocation& resultAlloc = imageBufferResult.getAllocation();
3204                                 invalidateAlloc(vk, device, resultAlloc);
3205
3206                                 // we always have our result data first
3207                                 datas.push_back(resultAlloc.getHostPtr());
3208
3209                                 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3210                                 {
3211                                         const deUint32 datasNdx = index - stagesCount;
3212                                         if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3213                                         {
3214                                                 const Allocation& alloc = inputBuffers[index]->getAllocation();
3215                                                 invalidateAlloc(vk, device, alloc);
3216                                                 // we always have our result data first
3217                                                 datas.push_back(alloc.getHostPtr());
3218                                         }
3219                                 }
3220
3221                                 if (!checkResult(internalData, datas, width, subgroupSize, false))
3222                                         failedIterations++;
3223                         }
3224
3225                         vk.resetCommandBuffer(*cmdBuffer, 0);
3226                 }
3227
3228                 if (0 < failedIterations)
3229                 {
3230                         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3231
3232                         context.getTestContext().getLog()
3233                                 << TestLog::Message << valuesPassed << " / "
3234                                 << totalIterations << " values passed" << TestLog::EndMessage;
3235
3236                         return tcu::TestStatus::fail("Failed!");
3237                 }
3238         }
3239
3240         return tcu::TestStatus::pass("OK");
3241 }
3242
3243 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
3244         SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
3245         bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
3246 {
3247         return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult,
3248                                                                                                                  0u, 0u);
3249 }
3250
3251 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(Context& context, vk::VkFormat format,
3252         SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
3253         bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
3254         const deUint32 vertexShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
3255 {
3256         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
3257         const VkDevice                                                  device                                  = context.getDevice();
3258         const VkQueue                                                   queue                                   = context.getUniversalQueue();
3259         const deUint32                                                  maxWidth                                = getMaxWidth();
3260         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
3261         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
3262         DescriptorSetLayoutBuilder                              layoutBuilder;
3263         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3264         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3265         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
3266
3267         const VkVertexInputBindingDescription   vertexInputBinding              =
3268         {
3269                 0u,                                                                                     // binding;
3270                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
3271                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
3272         };
3273
3274         const VkVertexInputAttributeDescription vertexInputAttribute    =
3275         {
3276                 0u,
3277                 0u,
3278                 VK_FORMAT_R32G32B32A32_SFLOAT,
3279                 0u
3280         };
3281
3282         for (deUint32 i = 0u; i < extraDataCount; i++)
3283         {
3284                 if (extraData[i].isImage)
3285                 {
3286                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3287                 }
3288                 else
3289                 {
3290                         vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3291                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3292                 }
3293                 const Allocation& alloc = inputBuffers[i]->getAllocation();
3294                 initializeMemory(context, alloc, extraData[i]);
3295         }
3296
3297         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3298                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3299
3300         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(vk, device));
3301
3302         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
3303
3304         const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3305         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
3306                                                                                                                                                                                   VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3307                                                                                                                                                                                   *vertexShaderModule, *fragmentShaderModule,
3308                                                                                                                                                                                   DE_NULL, DE_NULL, DE_NULL,
3309                                                                                                                                                                                   *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3310                                                                                                                                                                                   &vertexInputBinding, &vertexInputAttribute, true, format,
3311                                                                                                                                                                                   vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3312                                                                                                                                                                                   requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3313         DescriptorPoolBuilder                                   poolBuilder;
3314         DescriptorSetUpdateBuilder                              updateBuilder;
3315
3316
3317         for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3318                 poolBuilder.addType(inputBuffers[ndx]->getType());
3319
3320         Move <VkDescriptorPool>                                 descriptorPool;
3321         Move <VkDescriptorSet>                                  descriptorSet;
3322
3323         if (extraDataCount > 0)
3324         {
3325                 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3326                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3327         }
3328
3329         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3330         {
3331                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3332                 initializeMemory(context, alloc, extraData[ndx]);
3333         }
3334
3335         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3336         {
3337                 if (inputBuffers[buffersNdx]->isImage())
3338                 {
3339                         VkDescriptorImageInfo info =
3340                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3341                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3342
3343                         updateBuilder.writeSingle(*descriptorSet,
3344                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3345                                                                                 inputBuffers[buffersNdx]->getType(), &info);
3346                 }
3347                 else
3348                 {
3349                         VkDescriptorBufferInfo info =
3350                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3351                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3352
3353                         updateBuilder.writeSingle(*descriptorSet,
3354                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3355                                                                                 inputBuffers[buffersNdx]->getType(), &info);
3356                 }
3357         }
3358         updateBuilder.update(vk, device);
3359
3360         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
3361
3362         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
3363
3364         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
3365
3366         const vk::VkDeviceSize                                  vertexBufferSize                = maxWidth * sizeof(tcu::Vec4);
3367         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3368
3369         unsigned                                                                totalIterations                 = 0u;
3370         unsigned                                                                failedIterations                = 0u;
3371
3372         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3373
3374         {
3375                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
3376                 std::vector<tcu::Vec4>  data                            (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3377                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
3378                 float                                   leftHandPosition        = -1.0f;
3379
3380                 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3381                 {
3382                         data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3383                         leftHandPosition += pixelSize;
3384                 }
3385
3386                 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3387                 flushAlloc(vk, device, alloc);
3388         }
3389
3390         const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3391         const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
3392         const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
3393         const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3394         Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3395         const VkDeviceSize                      vertexBufferOffset      = 0u;
3396
3397         for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3398         {
3399                 totalIterations++;
3400
3401                 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3402                 {
3403                         const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3404                         initializeMemory(context, alloc, extraData[ndx]);
3405                 }
3406
3407                 beginCommandBuffer(vk, *cmdBuffer);
3408                 {
3409                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3410
3411                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3412
3413                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3414
3415                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3416
3417                         if (extraDataCount > 0)
3418                         {
3419                                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3420                                         &descriptorSet.get(), 0u, DE_NULL);
3421                         }
3422
3423                         vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3424
3425                         vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3426
3427                         endRenderPass(vk, *cmdBuffer);
3428
3429                         copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3430
3431                         endCommandBuffer(vk, *cmdBuffer);
3432
3433                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3434                 }
3435
3436                 {
3437                         const Allocation& allocResult = imageBufferResult.getAllocation();
3438                         invalidateAlloc(vk, device, allocResult);
3439
3440                         std::vector<const void*> datas;
3441                         datas.push_back(allocResult.getHostPtr());
3442                         if (!checkResult(internalData, datas, width, subgroupSize))
3443                                 failedIterations++;
3444                 }
3445         }
3446
3447         if (0 < failedIterations)
3448         {
3449                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3450
3451                 context.getTestContext().getLog()
3452                         << TestLog::Message << valuesPassed << " / "
3453                         << totalIterations << " values passed" << TestLog::EndMessage;
3454
3455                 return tcu::TestStatus::fail("Failed!");
3456         }
3457
3458         return tcu::TestStatus::pass("OK");
3459 }
3460
3461 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest(
3462         Context& context, VkFormat format, SSBOData* extraDatas,
3463         deUint32 extraDatasCount, const void* internalData,
3464         bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
3465                                                 deUint32 height, deUint32 subgroupSize))
3466 {
3467         return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult,
3468                                                                                                                    0u, 0u);
3469 }
3470
3471 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(
3472         Context& context, VkFormat format, SSBOData* extraDatas,
3473         deUint32 extraDatasCount, const void* internalData,
3474         bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
3475                                                 deUint32 height, deUint32 subgroupSize),
3476         const deUint32 fragmentShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
3477 {
3478         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
3479         const VkDevice                                                  device                                  = context.getDevice();
3480         const VkQueue                                                   queue                                   = context.getUniversalQueue();
3481         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
3482         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule
3483                                                                                                                                                 (vk, device, context.getBinaryCollection().get("vert"), 0u));
3484         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule
3485                                                                                                                                                 (vk, device, context.getBinaryCollection().get("fragment"), 0u));
3486
3487         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
3488
3489         for (deUint32 i = 0; i < extraDatasCount; i++)
3490         {
3491                 if (extraDatas[i].isImage)
3492                 {
3493                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3494                                                                                 static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3495                 }
3496                 else
3497                 {
3498                         vk::VkDeviceSize size =
3499                                 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3500                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3501                 }
3502
3503                 const Allocation& alloc = inputBuffers[i]->getAllocation();
3504                 initializeMemory(context, alloc, extraDatas[i]);
3505         }
3506
3507         DescriptorSetLayoutBuilder layoutBuilder;
3508
3509         for (deUint32 i = 0; i < extraDatasCount; i++)
3510         {
3511                 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
3512                                                                  VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3513         }
3514
3515         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3516                 layoutBuilder.build(vk, device));
3517
3518         const Unique<VkPipelineLayout> pipelineLayout(
3519                 makePipelineLayout(vk, device, *descriptorSetLayout));
3520
3521         const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3522
3523         const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3524         const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3525                                                                                                                    VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3526                                                                                                                    *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3527                                                                                                                    DE_NULL, DE_NULL, true, VK_FORMAT_R32G32B32A32_SFLOAT,
3528                                                                                                                    0u, 0u, 0u, 0u, fragmentShaderStageCreateFlags, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3529
3530         DescriptorPoolBuilder poolBuilder;
3531
3532         // To stop validation complaining, always add at least one type to pool.
3533         poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3534         for (deUint32 i = 0; i < extraDatasCount; i++)
3535         {
3536                 poolBuilder.addType(inputBuffers[i]->getType());
3537         }
3538
3539         Move<VkDescriptorPool> descriptorPool;
3540         // Create descriptor set
3541         Move<VkDescriptorSet> descriptorSet;
3542
3543         if (extraDatasCount > 0)
3544         {
3545                 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3546
3547                 descriptorSet   = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3548         }
3549
3550         DescriptorSetUpdateBuilder updateBuilder;
3551
3552         for (deUint32 i = 0; i < extraDatasCount; i++)
3553         {
3554                 if (inputBuffers[i]->isImage())
3555                 {
3556                         VkDescriptorImageInfo info =
3557                                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3558                                                                                 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3559
3560                         updateBuilder.writeSingle(*descriptorSet,
3561                                                                           DescriptorSetUpdateBuilder::Location::binding(i),
3562                                                                           inputBuffers[i]->getType(), &info);
3563                 }
3564                 else
3565                 {
3566                         VkDescriptorBufferInfo info =
3567                                 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
3568                                                                                  0ull, inputBuffers[i]->getAsBuffer()->getSize());
3569
3570                         updateBuilder.writeSingle(*descriptorSet,
3571                                                                           DescriptorSetUpdateBuilder::Location::binding(i),
3572                                                                           inputBuffers[i]->getType(), &info);
3573                 }
3574         }
3575
3576         if (extraDatasCount > 0)
3577                 updateBuilder.update(vk, device);
3578
3579         const Unique<VkCommandPool>             cmdPool                         (makeCommandPool(vk, device, queueFamilyIndex));
3580
3581         const deUint32                                  subgroupSize            = getSubgroupSize(context);
3582
3583         const Unique<VkCommandBuffer>   cmdBuffer                       (makeCommandBuffer(context, *cmdPool));
3584
3585         unsigned totalIterations = 0;
3586         unsigned failedIterations = 0;
3587
3588         for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3589         {
3590                 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3591                 {
3592                         totalIterations++;
3593
3594                         // re-init the data
3595                         for (deUint32 i = 0; i < extraDatasCount; i++)
3596                         {
3597                                 const Allocation& alloc = inputBuffers[i]->getAllocation();
3598                                 initializeMemory(context, alloc, extraDatas[i]);
3599                         }
3600
3601                         VkDeviceSize formatSize = getFormatSizeInBytes(format);
3602                         const VkDeviceSize resultImageSizeInBytes =
3603                                 width * height * formatSize;
3604
3605                         Image resultImage(context, width, height, format,
3606                                                           VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
3607                                                           VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3608
3609                         Buffer resultBuffer(context, resultImageSizeInBytes,
3610                                                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3611
3612                         const Unique<VkFramebuffer> framebuffer(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3613
3614                         beginCommandBuffer(vk, *cmdBuffer);
3615
3616                         VkViewport viewport = makeViewport(width, height);
3617
3618                         vk.cmdSetViewport(
3619                                 *cmdBuffer, 0, 1, &viewport);
3620
3621                         VkRect2D scissor = {{0, 0}, {width, height}};
3622
3623                         vk.cmdSetScissor(
3624                                 *cmdBuffer, 0, 1, &scissor);
3625
3626                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3627
3628                         vk.cmdBindPipeline(
3629                                 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3630
3631                         if (extraDatasCount > 0)
3632                         {
3633                                 vk.cmdBindDescriptorSets(*cmdBuffer,
3634                                                 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3635                                                 &descriptorSet.get(), 0u, DE_NULL);
3636                         }
3637
3638                         vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3639
3640                         endRenderPass(vk, *cmdBuffer);
3641
3642                         copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3643
3644                         endCommandBuffer(vk, *cmdBuffer);
3645
3646                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3647
3648                         std::vector<const void*> datas;
3649                         {
3650                                 const Allocation& resultAlloc = resultBuffer.getAllocation();
3651                                 invalidateAlloc(vk, device, resultAlloc);
3652
3653                                 // we always have our result data first
3654                                 datas.push_back(resultAlloc.getHostPtr());
3655                         }
3656
3657                         if (!checkResult(internalData, datas, width, height, subgroupSize))
3658                         {
3659                                 failedIterations++;
3660                         }
3661
3662                         vk.resetCommandBuffer(*cmdBuffer, 0);
3663                 }
3664         }
3665
3666         if (0 < failedIterations)
3667         {
3668                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3669
3670                 context.getTestContext().getLog()
3671                         << TestLog::Message << valuesPassed << " / "
3672                         << totalIterations << " values passed" << TestLog::EndMessage;
3673
3674                 return tcu::TestStatus::fail("Failed!");
3675         }
3676
3677         return tcu::TestStatus::pass("OK");
3678 }
3679
3680 Move<VkPipeline> makeComputePipeline(Context& context,
3681                                                                          const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
3682                                                                          const deUint32 pipelineShaderStageFlags, const deUint32 pipelineCreateFlags, VkPipeline basePipelineHandle,
3683                                                                          deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ, deUint32 requiredSubgroupSize)
3684 {
3685         const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3686
3687         const vk::VkSpecializationMapEntry entries[3] =
3688         {
3689                 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
3690                 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
3691                 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3692         };
3693
3694         const vk::VkSpecializationInfo info =
3695         {
3696                 /* mapEntryCount = */ 3,
3697                 /* pMapEntries   = */ entries,
3698                 /* dataSize      = */ sizeof(localSize),
3699                 /* pData         = */ localSize
3700         };
3701
3702         const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3703         {
3704                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType    sType;
3705                 DE_NULL,                                                                                                                                                // void*              pNext;
3706                 requiredSubgroupSize                                                                                                                    // uint32_t           requiredSubgroupSize;
3707         };
3708
3709         const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3710         {
3711                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,                            // VkStructureType                                      sType;
3712                 (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL),       // const void*                                          pNext;
3713                 pipelineShaderStageFlags,                                                                                       // VkPipelineShaderStageCreateFlags     flags;
3714                 VK_SHADER_STAGE_COMPUTE_BIT,                                                                            // VkShaderStageFlagBits                        stage;
3715                 shaderModule,                                                                                                           // VkShaderModule                                       module;
3716                 "main",                                                                                                                         // const char*                                          pName;
3717                 &info,                                                                                                                          // const VkSpecializationInfo*          pSpecializationInfo;
3718         };
3719
3720         const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
3721         {
3722                 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType      sType;
3723                 DE_NULL,                                                                                // const void*                                          pNext;
3724                 pipelineCreateFlags,                                                    // VkPipelineCreateFlags                        flags;
3725                 pipelineShaderStageParams,                                              // VkPipelineShaderStageCreateInfo      stage;
3726                 pipelineLayout,                                                                 // VkPipelineLayout                                     layout;
3727                 basePipelineHandle,                                                             // VkPipeline                                           basePipelineHandle;
3728                 -1,                                                                                             // deInt32                                                      basePipelineIndex;
3729         };
3730
3731         return createComputePipeline(context.getDeviceInterface(),
3732                                                                  context.getDevice(), DE_NULL, &pipelineCreateInfo);
3733 }
3734
3735 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize(
3736         Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
3737         bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
3738                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
3739                                                 deUint32 subgroupSize),
3740         const deUint32 pipelineShaderStageCreateFlags, const deUint32 numWorkgroups[3],
3741         const deBool isRequiredSubgroupSize, const deUint32 subgroupSize, const deUint32 localSizesToTest[][3], const deUint32 localSizesToTestCount)
3742 {
3743         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
3744         const VkDevice                                                  device                                  = context.getDevice();
3745         const VkQueue                                                   queue                                   = context.getUniversalQueue();
3746         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
3747         VkDeviceSize                                                    elementSize                             = getFormatSizeInBytes(format);
3748
3749         VkDeviceSize maxSubgroupSize = maxSupportedSubgroupSize();
3750
3751         if (isRequiredSubgroupSize)
3752         {
3753                 VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
3754                 subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
3755                 subgroupSizeControlProperties.pNext = DE_NULL;
3756
3757                 VkPhysicalDeviceProperties2 properties2;
3758                 properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
3759                 properties2.pNext = &subgroupSizeControlProperties;
3760                 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties2);
3761                 maxSubgroupSize = deMax32(subgroupSizeControlProperties.maxSubgroupSize, static_cast<deUint32>(maxSubgroupSize));
3762         }
3763
3764         const VkDeviceSize resultBufferSize = maxSubgroupSize *
3765                                                                                   maxSubgroupSize *
3766                                                                                   maxSubgroupSize;
3767
3768         const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
3769
3770         Buffer resultBuffer(
3771                 context, resultBufferSizeInBytes);
3772
3773         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
3774
3775         for (deUint32 i = 0; i < inputsCount; i++)
3776         {
3777                 if (inputs[i].isImage)
3778                 {
3779                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3780                                                                                 static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
3781                 }
3782                 else
3783                 {
3784                         vk::VkDeviceSize size =
3785                                 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3786                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3787                 }
3788
3789                 const Allocation& alloc = inputBuffers[i]->getAllocation();
3790                 initializeMemory(context, alloc, inputs[i]);
3791         }
3792
3793         DescriptorSetLayoutBuilder layoutBuilder;
3794         layoutBuilder.addBinding(
3795                 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3796
3797         for (deUint32 i = 0; i < inputsCount; i++)
3798         {
3799                 layoutBuilder.addBinding(
3800                         inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3801         }
3802
3803         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3804                 layoutBuilder.build(vk, device));
3805
3806         const Unique<VkShaderModule> shaderModule(
3807                 createShaderModule(vk, device,
3808                                                    context.getBinaryCollection().get("comp"), 0u));
3809         const Unique<VkPipelineLayout> pipelineLayout(
3810                 makePipelineLayout(vk, device, *descriptorSetLayout));
3811
3812         DescriptorPoolBuilder poolBuilder;
3813
3814         poolBuilder.addType(resultBuffer.getType());
3815
3816         for (deUint32 i = 0; i < inputsCount; i++)
3817         {
3818                 poolBuilder.addType(inputBuffers[i]->getType());
3819         }
3820
3821         const Unique<VkDescriptorPool> descriptorPool(
3822                 poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3823
3824         // Create descriptor set
3825         const Unique<VkDescriptorSet> descriptorSet(
3826                 makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3827
3828         DescriptorSetUpdateBuilder updateBuilder;
3829
3830         const VkDescriptorBufferInfo resultDescriptorInfo =
3831                 makeDescriptorBufferInfo(
3832                         resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
3833
3834         updateBuilder.writeSingle(*descriptorSet,
3835                                                           DescriptorSetUpdateBuilder::Location::binding(0u),
3836                                                           VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
3837
3838         for (deUint32 i = 0; i < inputsCount; i++)
3839         {
3840                 if (inputBuffers[i]->isImage())
3841                 {
3842                         VkDescriptorImageInfo info =
3843                                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3844                                                                                 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3845
3846                         updateBuilder.writeSingle(*descriptorSet,
3847                                                                           DescriptorSetUpdateBuilder::Location::binding(i + 1),
3848                                                                           inputBuffers[i]->getType(), &info);
3849                 }
3850                 else
3851                 {
3852                         vk::VkDeviceSize size =
3853                                 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3854                         VkDescriptorBufferInfo info =
3855                                 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
3856
3857                         updateBuilder.writeSingle(*descriptorSet,
3858                                                                           DescriptorSetUpdateBuilder::Location::binding(i + 1),
3859                                                                           inputBuffers[i]->getType(), &info);
3860                 }
3861         }
3862
3863         updateBuilder.update(vk, device);
3864
3865         const Unique<VkCommandPool>             cmdPool                         (makeCommandPool(vk, device, queueFamilyIndex));
3866
3867         unsigned totalIterations = 0;
3868         unsigned failedIterations = 0;
3869
3870         const Unique<VkCommandBuffer> cmdBuffer(
3871                 makeCommandBuffer(context, *cmdPool));
3872
3873         Move<VkPipeline> *pipelines = new Move<VkPipeline>[localSizesToTestCount - 1];
3874         pipelines[0] =
3875                 makeComputePipeline(context, *pipelineLayout, *shaderModule,
3876                                                         pipelineShaderStageCreateFlags, VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, (VkPipeline) DE_NULL,
3877                                                         localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2],
3878                                                         isRequiredSubgroupSize ? subgroupSize : 0u);
3879
3880         for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
3881         {
3882                 const deUint32 nextX = localSizesToTest[index][0];
3883                 const deUint32 nextY = localSizesToTest[index][1];
3884                 const deUint32 nextZ = localSizesToTest[index][2];
3885
3886                 pipelines[index] =
3887                         makeComputePipeline(context, *pipelineLayout, *shaderModule,
3888                                                                 pipelineShaderStageCreateFlags, VK_PIPELINE_CREATE_DERIVATIVE_BIT, *pipelines[0],
3889                                                                 nextX, nextY, nextZ,
3890                                                                 isRequiredSubgroupSize ? subgroupSize : 0u);
3891         }
3892
3893         for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
3894         {
3895
3896                 // we are running one test
3897                 totalIterations++;
3898
3899                 beginCommandBuffer(vk, *cmdBuffer);
3900
3901                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelines[index]);
3902
3903                 vk.cmdBindDescriptorSets(*cmdBuffer,
3904                                 VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
3905                                 &descriptorSet.get(), 0u, DE_NULL);
3906
3907                 vk.cmdDispatch(*cmdBuffer,numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
3908
3909                 endCommandBuffer(vk, *cmdBuffer);
3910
3911                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3912
3913                 std::vector<const void*> datas;
3914
3915                 {
3916                         const Allocation& resultAlloc = resultBuffer.getAllocation();
3917                         invalidateAlloc(vk, device, resultAlloc);
3918
3919                         // we always have our result data first
3920                         datas.push_back(resultAlloc.getHostPtr());
3921                 }
3922
3923                 for (deUint32 i = 0; i < inputsCount; i++)
3924                 {
3925                         if (!inputBuffers[i]->isImage())
3926                         {
3927                                 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
3928                                 invalidateAlloc(vk, device, resultAlloc);
3929
3930                                 // we always have our result data first
3931                                 datas.push_back(resultAlloc.getHostPtr());
3932                         }
3933                 }
3934
3935                 if (!checkResult(internalData, datas, numWorkgroups, localSizesToTest[index], subgroupSize))
3936                 {
3937                         failedIterations++;
3938                 }
3939
3940                 vk.resetCommandBuffer(*cmdBuffer, 0);
3941         }
3942
3943         delete[] pipelines;
3944
3945         if (0 < failedIterations)
3946         {
3947                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3948
3949                 context.getTestContext().getLog()
3950                         << TestLog::Message << valuesPassed << " / "
3951                         << totalIterations << " values passed" << TestLog::EndMessage;
3952
3953                 return tcu::TestStatus::fail("Failed!");
3954         }
3955
3956         return tcu::TestStatus::pass("OK");
3957 }
3958
3959 tcu::TestStatus vkt::subgroups::makeComputeTest(
3960         Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
3961         bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
3962                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
3963                                                 deUint32 subgroupSize),
3964         deUint32 requiredSubgroupSize, const deUint32 pipelineShaderStageCreateFlags)
3965 {
3966         const deUint32 numWorkgroups[3] = {4, 2, 2};
3967         deUint32 subgroupSize = requiredSubgroupSize;
3968
3969         if(requiredSubgroupSize == 0)
3970                 subgroupSize = vkt::subgroups::getSubgroupSize(context);
3971
3972         const deUint32 localSizesToTestCount = 8;
3973         deUint32 localSizesToTest[localSizesToTestCount][3] =
3974         {
3975                 {1, 1, 1},
3976                 {subgroupSize, 1, 1},
3977                 {1, subgroupSize, 1},
3978                 {1, 1, subgroupSize},
3979                 {32, 4, 1},
3980                 {1, 4, 32},
3981                 {3, 5, 7},
3982                 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
3983         };
3984
3985         return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
3986                                                                                            numWorkgroups, requiredSubgroupSize != 0u, subgroupSize, localSizesToTest, localSizesToTestCount);
3987 }