Merge vk-gl-cts/vulkan-cts-1.1.6 into vk-gl-cts/master
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / subgroups / vktSubgroupsTestsUtils.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "deFloat16.h"
28 #include "deRandom.hpp"
29 #include "tcuCommandLine.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "vkBarrierUtil.hpp"
32 #include "vkImageUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkObjUtil.hpp"
36 using namespace tcu;
37 using namespace std;
38 using namespace vk;
39 using namespace vkt;
40
41 namespace
42 {
43
44 deUint32 getMaxWidth ()
45 {
46         return 1024u;
47 }
48
49 deUint32 getNextWidth (const deUint32 width)
50 {
51         if (width < 128)
52         {
53                 // This ensures we test every value up to 128 (the max subgroup size).
54                 return width + 1;
55         }
56         else
57         {
58                 // And once we hit 128 we increment to only power of 2's to reduce testing time.
59                 return width * 2;
60         }
61 }
62
63 deUint32 getFormatSizeInBytes(const VkFormat format)
64 {
65         switch (format)
66         {
67                 default:
68                         DE_FATAL("Unhandled format!");
69                         return 0;
70                 case VK_FORMAT_R8_SINT:
71                 case VK_FORMAT_R8_UINT:
72                         return static_cast<deUint32>(sizeof(deInt8));
73                 case VK_FORMAT_R8G8_SINT:
74                 case VK_FORMAT_R8G8_UINT:
75                         return static_cast<deUint32>(sizeof(deInt8) * 2);
76                 case VK_FORMAT_R8G8B8_SINT:
77                 case VK_FORMAT_R8G8B8_UINT:
78                 case VK_FORMAT_R8G8B8A8_SINT:
79                 case VK_FORMAT_R8G8B8A8_UINT:
80                         return static_cast<deUint32>(sizeof(deInt8) * 4);
81                 case VK_FORMAT_R16_SINT:
82                 case VK_FORMAT_R16_UINT:
83                 case VK_FORMAT_R16_SFLOAT:
84                         return static_cast<deUint32>(sizeof(deInt16));
85                 case VK_FORMAT_R16G16_SINT:
86                 case VK_FORMAT_R16G16_UINT:
87                 case VK_FORMAT_R16G16_SFLOAT:
88                         return static_cast<deUint32>(sizeof(deInt16) * 2);
89                 case VK_FORMAT_R16G16B16_UINT:
90                 case VK_FORMAT_R16G16B16_SINT:
91                 case VK_FORMAT_R16G16B16_SFLOAT:
92                 case VK_FORMAT_R16G16B16A16_SINT:
93                 case VK_FORMAT_R16G16B16A16_UINT:
94                 case VK_FORMAT_R16G16B16A16_SFLOAT:
95                         return static_cast<deUint32>(sizeof(deInt16) * 4);
96                 case VK_FORMAT_R32_SINT:
97                 case VK_FORMAT_R32_UINT:
98                 case VK_FORMAT_R32_SFLOAT:
99                         return static_cast<deUint32>(sizeof(deInt32));
100                 case VK_FORMAT_R32G32_SINT:
101                 case VK_FORMAT_R32G32_UINT:
102                 case VK_FORMAT_R32G32_SFLOAT:
103                         return static_cast<deUint32>(sizeof(deInt32) * 2);
104                 case VK_FORMAT_R32G32B32_SINT:
105                 case VK_FORMAT_R32G32B32_UINT:
106                 case VK_FORMAT_R32G32B32_SFLOAT:
107                 case VK_FORMAT_R32G32B32A32_SINT:
108                 case VK_FORMAT_R32G32B32A32_UINT:
109                 case VK_FORMAT_R32G32B32A32_SFLOAT:
110                         return static_cast<deUint32>(sizeof(deInt32) * 4);
111                 case VK_FORMAT_R64_SINT:
112                 case VK_FORMAT_R64_UINT:
113                 case VK_FORMAT_R64_SFLOAT:
114                         return static_cast<deUint32>(sizeof(deInt64));
115                 case VK_FORMAT_R64G64_SINT:
116                 case VK_FORMAT_R64G64_UINT:
117                 case VK_FORMAT_R64G64_SFLOAT:
118                         return static_cast<deUint32>(sizeof(deInt64) * 2);
119                 case VK_FORMAT_R64G64B64_SINT:
120                 case VK_FORMAT_R64G64B64_UINT:
121                 case VK_FORMAT_R64G64B64_SFLOAT:
122                 case VK_FORMAT_R64G64B64A64_SINT:
123                 case VK_FORMAT_R64G64B64A64_UINT:
124                 case VK_FORMAT_R64G64B64A64_SFLOAT:
125                         return static_cast<deUint32>(sizeof(deInt64) * 4);
126                 // The below formats are used to represent bool and bvec* types. These
127                 // types are passed to the shader as int and ivec* types, before the
128                 // calculations are done as booleans. We need a distinct type here so
129                 // that the shader generators can switch on it and generate the correct
130                 // shader source for testing.
131                 case VK_FORMAT_R8_USCALED:
132                         return static_cast<deUint32>(sizeof(deInt32));
133                 case VK_FORMAT_R8G8_USCALED:
134                         return static_cast<deUint32>(sizeof(deInt32) * 2);
135                 case VK_FORMAT_R8G8B8_USCALED:
136                 case VK_FORMAT_R8G8B8A8_USCALED:
137                         return static_cast<deUint32>(sizeof(deInt32) * 4);
138         }
139 }
140
141 deUint32 getElementSizeInBytes(
142         const VkFormat format,
143         const subgroups::SSBOData::InputDataLayoutType layout)
144 {
145         deUint32 bytes = getFormatSizeInBytes(format);
146         if (layout == subgroups::SSBOData::LayoutStd140)
147                 return bytes < 16 ? 16 : bytes;
148         else
149                 return bytes;
150 }
151
152 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
153 {
154         VkAttachmentReference colorReference = {
155                 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
156         };
157
158         const VkSubpassDescription subpassDescription = {0u,
159                                                                                                          VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
160                                                                                                          DE_NULL, DE_NULL, 0, DE_NULL
161                                                                                                         };
162
163         const VkSubpassDependency subpassDependencies[2] = {
164                 {   VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
165                         VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
166                         VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
167                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
168                         VK_DEPENDENCY_BY_REGION_BIT
169                 },
170                 {   0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
171                         VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
172                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
173                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
174                         VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
175                 },
176         };
177
178         VkAttachmentDescription attachmentDescription = {0u, format,
179                                                                                                          VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
180                                                                                                          VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
181                                                                                                          VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
182                                                                                                          VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
183                                                                                                         };
184
185         const VkRenderPassCreateInfo renderPassCreateInfo = {
186                 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
187                 &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
188         };
189
190         return createRenderPass(context.getDeviceInterface(), context.getDevice(),
191                                                         &renderPassCreateInfo);
192 }
193
194 Move<VkPipeline> makeGraphicsPipeline(Context&                                                                  context,
195                                                                           const VkPipelineLayout                                        pipelineLayout,
196                                                                           const VkShaderStageFlags                                      stages,
197                                                                           const VkShaderModule                                          vertexShaderModule,
198                                                                           const VkShaderModule                                          fragmentShaderModule,
199                                                                           const VkShaderModule                                          geometryShaderModule,
200                                                                           const VkShaderModule                                          tessellationControlModule,
201                                                                           const VkShaderModule                                          tessellationEvaluationModule,
202                                                                           const VkRenderPass                                            renderPass,
203                                                                           const VkPrimitiveTopology                                     topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
204                                                                           const VkVertexInputBindingDescription*        vertexInputBindingDescription = DE_NULL,
205                                                                           const VkVertexInputAttributeDescription*      vertexInputAttributeDescriptions = DE_NULL,
206                                                                           const bool                                                            frameBufferTests = false,
207                                                                           const vk::VkFormat                                            attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
208 {
209         std::vector<VkViewport> noViewports;
210         std::vector<VkRect2D>   noScissors;
211
212         const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
213         {
214                 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,      // VkStructureType                                                              sType;
215                 DE_NULL,                                                                                                        // const void*                                                                  pNext;
216                 0u,                                                                                                                     // VkPipelineVertexInputStateCreateFlags                flags;
217                 vertexInputBindingDescription == DE_NULL ? 0u : 1u,                     // deUint32                                                                             vertexBindingDescriptionCount;
218                 vertexInputBindingDescription,                                                          // const VkVertexInputBindingDescription*               pVertexBindingDescriptions;
219                 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,          // deUint32                                                                             vertexAttributeDescriptionCount;
220                 vertexInputAttributeDescriptions,                                                       // const VkVertexInputAttributeDescription*             pVertexAttributeDescriptions;
221         };
222
223         const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
224         const VkColorComponentFlags colorComponent =
225                                                                                                 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
226                                                                                                 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
227                                                                                                 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
228                                                                                                 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
229
230         const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
231         {
232                 VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
233                 VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
234                 colorComponent
235         };
236
237         const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
238         {
239                 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
240                 VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
241                 { 0.0f, 0.0f, 0.0f, 0.0f }
242         };
243
244         const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
245
246         return vk::makeGraphicsPipeline(context.getDeviceInterface(),   // const DeviceInterface&                        vk
247                                                                         context.getDevice(),                    // const VkDevice                                device
248                                                                         pipelineLayout,                                 // const VkPipelineLayout                        pipelineLayout
249                                                                         vertexShaderModule,                             // const VkShaderModule                          vertexShaderModule
250                                                                         tessellationControlModule,              // const VkShaderModule                          tessellationControlShaderModule
251                                                                         tessellationEvaluationModule,   // const VkShaderModule                          tessellationEvalShaderModule
252                                                                         geometryShaderModule,                   // const VkShaderModule                          geometryShaderModule
253                                                                         fragmentShaderModule,                   // const VkShaderModule                          fragmentShaderModule
254                                                                         renderPass,                                             // const VkRenderPass                            renderPass
255                                                                         noViewports,                                    // const std::vector<VkViewport>&                viewports
256                                                                         noScissors,                                             // const std::vector<VkRect2D>&                  scissors
257                                                                         topology,                                               // const VkPrimitiveTopology                     topology
258                                                                         0u,                                                             // const deUint32                                subpass
259                                                                         patchControlPoints,                             // const deUint32                                patchControlPoints
260                                                                         &vertexInputStateCreateInfo,    // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
261                                                                         DE_NULL,                                                // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
262                                                                         DE_NULL,                                                // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
263                                                                         DE_NULL,                                                // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
264                                                                         &colorBlendStateCreateInfo);    // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
265 }
266
267 Move<VkPipeline> makeComputePipeline(Context& context,
268                                                                          const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
269                                                                          const deUint32 pipelineCreateFlags, VkPipeline basePipelineHandle,
270                                                                          deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
271 {
272         const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
273
274         const vk::VkSpecializationMapEntry entries[3] =
275         {
276                 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
277                 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
278                 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
279         };
280
281         const vk::VkSpecializationInfo info =
282         {
283                 /* mapEntryCount = */ 3,
284                 /* pMapEntries   = */ entries,
285                 /* dataSize      = */ sizeof(localSize),
286                 /* pData         = */ localSize
287         };
288
289         const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
290         {
291                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,                            // VkStructureType                                      sType;
292                 DE_NULL,                                                                                                                        // const void*                                          pNext;
293                 0u,                                                                                                                                     // VkPipelineShaderStageCreateFlags     flags;
294                 VK_SHADER_STAGE_COMPUTE_BIT,                                                                            // VkShaderStageFlagBits                        stage;
295                 shaderModule,                                                                                                           // VkShaderModule                                       module;
296                 "main",                                                                                                                         // const char*                                          pName;
297                 &info,                                                                                                                          // const VkSpecializationInfo*          pSpecializationInfo;
298         };
299
300         const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
301         {
302                 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType      sType;
303                 DE_NULL,                                                                                // const void*                                          pNext;
304                 pipelineCreateFlags,                                                    // VkPipelineCreateFlags                        flags;
305                 pipelineShaderStageParams,                                              // VkPipelineShaderStageCreateInfo      stage;
306                 pipelineLayout,                                                                 // VkPipelineLayout                                     layout;
307                 basePipelineHandle,                                                             // VkPipeline                                           basePipelineHandle;
308                 -1,                                                                                             // deInt32                                                      basePipelineIndex;
309         };
310
311         return createComputePipeline(context.getDeviceInterface(),
312                                                                  context.getDevice(), DE_NULL, &pipelineCreateInfo);
313 }
314
315 Move<VkCommandBuffer> makeCommandBuffer(
316         Context& context, const VkCommandPool commandPool)
317 {
318         const VkCommandBufferAllocateInfo bufferAllocateParams =
319         {
320                 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType              sType;
321                 DE_NULL,                                                                                // const void*                  pNext;
322                 commandPool,                                                                    // VkCommandPool                commandPool;
323                 VK_COMMAND_BUFFER_LEVEL_PRIMARY,                                // VkCommandBufferLevel level;
324                 1u,                                                                                             // deUint32                             bufferCount;
325         };
326         return allocateCommandBuffer(context.getDeviceInterface(),
327                                                                  context.getDevice(), &bufferAllocateParams);
328 }
329
330 struct Buffer;
331 struct Image;
332
333 struct BufferOrImage
334 {
335         bool isImage() const
336         {
337                 return m_isImage;
338         }
339
340         Buffer* getAsBuffer()
341         {
342                 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
343                 return reinterpret_cast<Buffer* >(this);
344         }
345
346         Image* getAsImage()
347         {
348                 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
349                 return reinterpret_cast<Image*>(this);
350         }
351
352         virtual VkDescriptorType getType() const
353         {
354                 if (m_isImage)
355                 {
356                         return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
357                 }
358                 else
359                 {
360                         return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
361                 }
362         }
363
364         Allocation& getAllocation() const
365         {
366                 return *m_allocation;
367         }
368
369         virtual ~BufferOrImage() {}
370
371 protected:
372         explicit BufferOrImage(bool image) : m_isImage(image) {}
373
374         bool m_isImage;
375         de::details::MovePtr<Allocation> m_allocation;
376 };
377
378 struct Buffer : public BufferOrImage
379 {
380         explicit Buffer(
381                 Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
382                 : BufferOrImage         (false)
383                 , m_sizeInBytes         (sizeInBytes)
384                 , m_usage                       (usage)
385         {
386                 const DeviceInterface&                  vkd                                     = context.getDeviceInterface();
387                 const VkDevice                                  device                          = context.getDevice();
388
389                 const vk::VkBufferCreateInfo    bufferCreateInfo        =
390                 {
391                         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
392                         DE_NULL,
393                         0u,
394                         m_sizeInBytes,
395                         m_usage,
396                         VK_SHARING_MODE_EXCLUSIVE,
397                         0u,
398                         DE_NULL,
399                 };
400                 m_buffer                = createBuffer(vkd, device, &bufferCreateInfo);
401
402                 VkMemoryRequirements                    req                                     = getBufferMemoryRequirements(vkd, device, *m_buffer);
403
404                 m_allocation    = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
405                 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
406         }
407
408         virtual VkDescriptorType getType() const
409         {
410                 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
411                 {
412                         return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
413                 }
414                 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
415         }
416
417         VkBuffer getBuffer () const
418         {
419                 return *m_buffer;
420         }
421
422         const VkBuffer* getBufferPtr () const
423         {
424                 return &(*m_buffer);
425         }
426
427         VkDeviceSize getSize () const
428         {
429                 return m_sizeInBytes;
430         }
431
432 private:
433         Move<VkBuffer>                          m_buffer;
434         VkDeviceSize                            m_sizeInBytes;
435         const VkBufferUsageFlags        m_usage;
436 };
437
438 struct Image : public BufferOrImage
439 {
440         explicit Image(Context& context, deUint32 width, deUint32 height,
441                                    VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
442                 : BufferOrImage(true)
443         {
444                 const DeviceInterface&                  vk                                      = context.getDeviceInterface();
445                 const VkDevice                                  device                          = context.getDevice();
446                 const deUint32                                  queueFamilyIndex        = context.getUniversalQueueFamilyIndex();
447
448                 const VkImageCreateInfo                 imageCreateInfo         =
449                 {
450                         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
451                         format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
452                         VK_IMAGE_TILING_OPTIMAL, usage,
453                         VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
454                         VK_IMAGE_LAYOUT_UNDEFINED
455                 };
456
457                 const VkComponentMapping                componentMapping        =
458                 {
459                         VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
460                         VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
461                 };
462
463                 const VkImageSubresourceRange   subresourceRange        =
464                 {
465                         VK_IMAGE_ASPECT_COLOR_BIT,      //VkImageAspectFlags    aspectMask
466                         0u,                                                     //deUint32                              baseMipLevel
467                         1u,                                                     //deUint32                              levelCount
468                         0u,                                                     //deUint32                              baseArrayLayer
469                         1u                                                      //deUint32                              layerCount
470                 };
471
472                 const VkSamplerCreateInfo               samplerCreateInfo       =
473                 {
474                         VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
475                         DE_NULL,
476                         0u,
477                         VK_FILTER_NEAREST,
478                         VK_FILTER_NEAREST,
479                         VK_SAMPLER_MIPMAP_MODE_NEAREST,
480                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
481                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
482                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
483                         0.0f,
484                         VK_FALSE,
485                         1.0f,
486                         DE_FALSE,
487                         VK_COMPARE_OP_ALWAYS,
488                         0.0f,
489                         0.0f,
490                         VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
491                         VK_FALSE,
492                 };
493
494                 m_image                 = createImage(vk, device, &imageCreateInfo);
495
496                 VkMemoryRequirements                    req                                     = getImageMemoryRequirements(vk, device, *m_image);
497
498                 req.size                *= 2;
499                 m_allocation    = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
500
501                 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
502
503                 const VkImageViewCreateInfo             imageViewCreateInfo     =
504                 {
505                         VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
506                         VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
507                         subresourceRange
508                 };
509
510                 m_imageView             = createImageView(vk, device, &imageViewCreateInfo);
511                 m_sampler               = createSampler(vk, device, &samplerCreateInfo);
512
513                 // Transition input image layouts
514                 {
515                         const Unique<VkCommandPool>             cmdPool                 (makeCommandPool(vk, device, queueFamilyIndex));
516                         const Unique<VkCommandBuffer>   cmdBuffer               (makeCommandBuffer(context, *cmdPool));
517
518                         beginCommandBuffer(vk, *cmdBuffer);
519
520                         const VkImageMemoryBarrier              imageBarrier    = makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
521                                                                                                                                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
522
523                         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
524                                 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
525
526                         endCommandBuffer(vk, *cmdBuffer);
527                         submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
528                 }
529         }
530
531         VkImage getImage () const
532         {
533                 return *m_image;
534         }
535
536         VkImageView getImageView () const
537         {
538                 return *m_imageView;
539         }
540
541         VkSampler getSampler () const
542         {
543                 return *m_sampler;
544         }
545
546 private:
547         Move<VkImage> m_image;
548         Move<VkImageView> m_imageView;
549         Move<VkSampler> m_sampler;
550 };
551 }
552
553 std::string vkt::subgroups::getSharedMemoryBallotHelper()
554 {
555         return  "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
556                         "uvec4 sharedMemoryBallot(bool vote)\n"
557                         "{\n"
558                         "  uint groupOffset = gl_SubgroupID;\n"
559                         "  // One invocation in the group 0's the whole group's data\n"
560                         "  if (subgroupElect())\n"
561                         "  {\n"
562                         "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
563                         "  }\n"
564                         "  subgroupMemoryBarrierShared();\n"
565                         "  if (vote)\n"
566                         "  {\n"
567                         "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
568                         "    const highp uint bitToSet = 1u << invocationId;\n"
569                         "    switch (gl_SubgroupInvocationID / 32)\n"
570                         "    {\n"
571                         "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
572                         "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
573                         "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
574                         "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
575                         "    }\n"
576                         "  }\n"
577                         "  subgroupMemoryBarrierShared();\n"
578                         "  return superSecretComputeShaderHelper[groupOffset];\n"
579                         "}\n";
580 }
581
582 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
583 {
584         return  "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
585                         "uint64_t sharedMemoryBallot(bool vote)\n"
586                         "{\n"
587                         "  uint groupOffset = gl_SubgroupID;\n"
588                         "  // One invocation in the group 0's the whole group's data\n"
589                         "  if (subgroupElect())\n"
590                         "  {\n"
591                         "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
592                         "  }\n"
593                         "  subgroupMemoryBarrierShared();\n"
594                         "  if (vote)\n"
595                         "  {\n"
596                         "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
597                         "    const highp uint bitToSet = 1u << invocationId;\n"
598                         "    switch (gl_SubgroupInvocationID / 32)\n"
599                         "    {\n"
600                         "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
601                         "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
602                         "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
603                         "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
604                         "    }\n"
605                         "  }\n"
606                         "  subgroupMemoryBarrierShared();\n"
607                         "  return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
608                         "}\n";
609 }
610
611 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
612 {
613         VkPhysicalDeviceSubgroupProperties subgroupProperties;
614         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
615         subgroupProperties.pNext = DE_NULL;
616
617         VkPhysicalDeviceProperties2 properties;
618         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
619         properties.pNext = &subgroupProperties;
620
621         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
622
623         return subgroupProperties.subgroupSize;
624 }
625
626 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
627         return 128u;
628 }
629
630 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
631 {
632         switch (stage)
633         {
634                 default:
635                         DE_FATAL("Unhandled stage!");
636                         return "";
637                 case VK_SHADER_STAGE_COMPUTE_BIT:
638                         return "compute";
639                 case VK_SHADER_STAGE_FRAGMENT_BIT:
640                         return "fragment";
641                 case VK_SHADER_STAGE_VERTEX_BIT:
642                         return "vertex";
643                 case VK_SHADER_STAGE_GEOMETRY_BIT:
644                         return "geometry";
645                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
646                         return "tess_control";
647                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
648                         return "tess_eval";
649         }
650 }
651
652 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
653 {
654         switch (bit)
655         {
656                 default:
657                         DE_FATAL("Unknown subgroup feature category!");
658                         return "";
659                 case VK_SUBGROUP_FEATURE_BASIC_BIT:
660                         return "VK_SUBGROUP_FEATURE_BASIC_BIT";
661                 case VK_SUBGROUP_FEATURE_VOTE_BIT:
662                         return "VK_SUBGROUP_FEATURE_VOTE_BIT";
663                 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
664                         return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
665                 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
666                         return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
667                 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
668                         return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
669                 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
670                         return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
671                 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
672                         return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
673                 case VK_SUBGROUP_FEATURE_QUAD_BIT:
674                         return "VK_SUBGROUP_FEATURE_QUAD_BIT";
675         }
676 }
677
678 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
679 {
680         {
681         /*
682                 "#version 450\n"
683                 "void main (void)\n"
684                 "{\n"
685                 "  float pixelSize = 2.0f/1024.0f;\n"
686                 "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
687                 "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
688                 "  gl_PointSize = 1.0f;\n"
689                 "}\n"
690         */
691                 const std::string vertNoSubgroup =
692                         "; SPIR-V\n"
693                         "; Version: 1.3\n"
694                         "; Generator: Khronos Glslang Reference Front End; 1\n"
695                         "; Bound: 37\n"
696                         "; Schema: 0\n"
697                         "OpCapability Shader\n"
698                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
699                         "OpMemoryModel Logical GLSL450\n"
700                         "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
701                         "OpMemberDecorate %20 0 BuiltIn Position\n"
702                         "OpMemberDecorate %20 1 BuiltIn PointSize\n"
703                         "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
704                         "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
705                         "OpDecorate %20 Block\n"
706                         "OpDecorate %26 BuiltIn VertexIndex\n"
707                         "%2 = OpTypeVoid\n"
708                         "%3 = OpTypeFunction %2\n"
709                         "%6 = OpTypeFloat 32\n"
710                         "%7 = OpTypePointer Function %6\n"
711                         "%9 = OpConstant %6 0.00195313\n"
712                         "%12 = OpConstant %6 2\n"
713                         "%14 = OpConstant %6 1\n"
714                         "%16 = OpTypeVector %6 4\n"
715                         "%17 = OpTypeInt 32 0\n"
716                         "%18 = OpConstant %17 1\n"
717                         "%19 = OpTypeArray %6 %18\n"
718                         "%20 = OpTypeStruct %16 %6 %19 %19\n"
719                         "%21 = OpTypePointer Output %20\n"
720                         "%22 = OpVariable %21 Output\n"
721                         "%23 = OpTypeInt 32 1\n"
722                         "%24 = OpConstant %23 0\n"
723                         "%25 = OpTypePointer Input %23\n"
724                         "%26 = OpVariable %25 Input\n"
725                         "%33 = OpConstant %6 0\n"
726                         "%35 = OpTypePointer Output %16\n"
727                         "%37 = OpConstant %23 1\n"
728                         "%38 = OpTypePointer Output %6\n"
729                         "%4 = OpFunction %2 None %3\n"
730                         "%5 = OpLabel\n"
731                         "%8 = OpVariable %7 Function\n"
732                         "%10 = OpVariable %7 Function\n"
733                         "OpStore %8 %9\n"
734                         "%11 = OpLoad %6 %8\n"
735                         "%13 = OpFDiv %6 %11 %12\n"
736                         "%15 = OpFSub %6 %13 %14\n"
737                         "OpStore %10 %15\n"
738                         "%27 = OpLoad %23 %26\n"
739                         "%28 = OpConvertSToF %6 %27\n"
740                         "%29 = OpLoad %6 %8\n"
741                         "%30 = OpFMul %6 %28 %29\n"
742                         "%31 = OpLoad %6 %10\n"
743                         "%32 = OpFAdd %6 %30 %31\n"
744                         "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
745                         "%36 = OpAccessChain %35 %22 %24\n"
746                         "OpStore %36 %34\n"
747                         "%39 = OpAccessChain %38 %22 %37\n"
748                         "OpStore %39 %14\n"
749                         "OpReturn\n"
750                         "OpFunctionEnd\n";
751                 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
752         }
753
754         {
755         /*
756                 "#version 450\n"
757                 "layout(vertices=1) out;\n"
758                 "\n"
759                 "void main (void)\n"
760                 "{\n"
761                 "  if (gl_InvocationID == 0)\n"
762                 "  {\n"
763                 "    gl_TessLevelOuter[0] = 1.0f;\n"
764                 "    gl_TessLevelOuter[1] = 1.0f;\n"
765                 "  }\n"
766                 "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
767                 "}\n"
768         */
769                 const std::string tescNoSubgroup =
770                         "; SPIR-V\n"
771                         "; Version: 1.3\n"
772                         "; Generator: Khronos Glslang Reference Front End; 1\n"
773                         "; Bound: 45\n"
774                         "; Schema: 0\n"
775                         "OpCapability Tessellation\n"
776                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
777                         "OpMemoryModel Logical GLSL450\n"
778                         "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
779                         "OpExecutionMode %4 OutputVertices 1\n"
780                         "OpDecorate %8 BuiltIn InvocationId\n"
781                         "OpDecorate %20 Patch\n"
782                         "OpDecorate %20 BuiltIn TessLevelOuter\n"
783                         "OpMemberDecorate %29 0 BuiltIn Position\n"
784                         "OpMemberDecorate %29 1 BuiltIn PointSize\n"
785                         "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
786                         "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
787                         "OpDecorate %29 Block\n"
788                         "OpMemberDecorate %34 0 BuiltIn Position\n"
789                         "OpMemberDecorate %34 1 BuiltIn PointSize\n"
790                         "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
791                         "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
792                         "OpDecorate %34 Block\n"
793                         "%2 = OpTypeVoid\n"
794                         "%3 = OpTypeFunction %2\n"
795                         "%6 = OpTypeInt 32 1\n"
796                         "%7 = OpTypePointer Input %6\n"
797                         "%8 = OpVariable %7 Input\n"
798                         "%10 = OpConstant %6 0\n"
799                         "%11 = OpTypeBool\n"
800                         "%15 = OpTypeFloat 32\n"
801                         "%16 = OpTypeInt 32 0\n"
802                         "%17 = OpConstant %16 4\n"
803                         "%18 = OpTypeArray %15 %17\n"
804                         "%19 = OpTypePointer Output %18\n"
805                         "%20 = OpVariable %19 Output\n"
806                         "%21 = OpConstant %15 1\n"
807                         "%22 = OpTypePointer Output %15\n"
808                         "%24 = OpConstant %6 1\n"
809                         "%26 = OpTypeVector %15 4\n"
810                         "%27 = OpConstant %16 1\n"
811                         "%28 = OpTypeArray %15 %27\n"
812                         "%29 = OpTypeStruct %26 %15 %28 %28\n"
813                         "%30 = OpTypeArray %29 %27\n"
814                         "%31 = OpTypePointer Output %30\n"
815                         "%32 = OpVariable %31 Output\n"
816                         "%34 = OpTypeStruct %26 %15 %28 %28\n"
817                         "%35 = OpConstant %16 32\n"
818                         "%36 = OpTypeArray %34 %35\n"
819                         "%37 = OpTypePointer Input %36\n"
820                         "%38 = OpVariable %37 Input\n"
821                         "%40 = OpTypePointer Input %26\n"
822                         "%43 = OpTypePointer Output %26\n"
823                         "%4 = OpFunction %2 None %3\n"
824                         "%5 = OpLabel\n"
825                         "%9 = OpLoad %6 %8\n"
826                         "%12 = OpIEqual %11 %9 %10\n"
827                         "OpSelectionMerge %14 None\n"
828                         "OpBranchConditional %12 %13 %14\n"
829                         "%13 = OpLabel\n"
830                         "%23 = OpAccessChain %22 %20 %10\n"
831                         "OpStore %23 %21\n"
832                         "%25 = OpAccessChain %22 %20 %24\n"
833                         "OpStore %25 %21\n"
834                         "OpBranch %14\n"
835                         "%14 = OpLabel\n"
836                         "%33 = OpLoad %6 %8\n"
837                         "%39 = OpLoad %6 %8\n"
838                         "%41 = OpAccessChain %40 %38 %39 %10\n"
839                         "%42 = OpLoad %26 %41\n"
840                         "%44 = OpAccessChain %43 %32 %33 %10\n"
841                         "OpStore %44 %42\n"
842                         "OpReturn\n"
843                         "OpFunctionEnd\n";
844                 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
845         }
846
847         {
848         /*
849                 "#version 450\n"
850                 "layout(isolines) in;\n"
851                 "\n"
852                 "void main (void)\n"
853                 "{\n"
854                 "  float pixelSize = 2.0f/1024.0f;\n"
855                 "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
856                 "}\n";
857         */
858                 const std::string teseNoSubgroup =
859                         "; SPIR-V\n"
860                         "; Version: 1.3\n"
861                         "; Generator: Khronos Glslang Reference Front End; 2\n"
862                         "; Bound: 42\n"
863                         "; Schema: 0\n"
864                         "OpCapability Tessellation\n"
865                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
866                         "OpMemoryModel Logical GLSL450\n"
867                         "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
868                         "OpExecutionMode %4 Isolines\n"
869                         "OpExecutionMode %4 SpacingEqual\n"
870                         "OpExecutionMode %4 VertexOrderCcw\n"
871                         "OpMemberDecorate %14 0 BuiltIn Position\n"
872                         "OpMemberDecorate %14 1 BuiltIn PointSize\n"
873                         "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
874                         "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
875                         "OpDecorate %14 Block\n"
876                         "OpMemberDecorate %19 0 BuiltIn Position\n"
877                         "OpMemberDecorate %19 1 BuiltIn PointSize\n"
878                         "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
879                         "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
880                         "OpDecorate %19 Block\n"
881                         "OpDecorate %29 BuiltIn TessCoord\n"
882                         "%2 = OpTypeVoid\n"
883                         "%3 = OpTypeFunction %2\n"
884                         "%6 = OpTypeFloat 32\n"
885                         "%7 = OpTypePointer Function %6\n"
886                         "%9 = OpConstant %6 0.00195313\n"
887                         "%10 = OpTypeVector %6 4\n"
888                         "%11 = OpTypeInt 32 0\n"
889                         "%12 = OpConstant %11 1\n"
890                         "%13 = OpTypeArray %6 %12\n"
891                         "%14 = OpTypeStruct %10 %6 %13 %13\n"
892                         "%15 = OpTypePointer Output %14\n"
893                         "%16 = OpVariable %15 Output\n"
894                         "%17 = OpTypeInt 32 1\n"
895                         "%18 = OpConstant %17 0\n"
896                         "%19 = OpTypeStruct %10 %6 %13 %13\n"
897                         "%20 = OpConstant %11 32\n"
898                         "%21 = OpTypeArray %19 %20\n"
899                         "%22 = OpTypePointer Input %21\n"
900                         "%23 = OpVariable %22 Input\n"
901                         "%24 = OpTypePointer Input %10\n"
902                         "%27 = OpTypeVector %6 3\n"
903                         "%28 = OpTypePointer Input %27\n"
904                         "%29 = OpVariable %28 Input\n"
905                         "%30 = OpConstant %11 0\n"
906                         "%31 = OpTypePointer Input %6\n"
907                         "%36 = OpConstant %6 2\n"
908                         "%40 = OpTypePointer Output %10\n"
909                         "%4 = OpFunction %2 None %3\n"
910                         "%5 = OpLabel\n"
911                         "%8 = OpVariable %7 Function\n"
912                         "OpStore %8 %9\n"
913                         "%25 = OpAccessChain %24 %23 %18 %18\n"
914                         "%26 = OpLoad %10 %25\n"
915                         "%32 = OpAccessChain %31 %29 %30\n"
916                         "%33 = OpLoad %6 %32\n"
917                         "%34 = OpLoad %6 %8\n"
918                         "%35 = OpFMul %6 %33 %34\n"
919                         "%37 = OpFDiv %6 %35 %36\n"
920                         "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
921                         "%39 = OpFAdd %10 %26 %38\n"
922                         "%41 = OpAccessChain %40 %16 %18\n"
923                         "OpStore %41 %39\n"
924                         "OpReturn\n"
925                         "OpFunctionEnd\n";
926                 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
927         }
928
929 }
930
931
932 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
933 {
934         switch (stage)
935         {
936                 default:
937                         DE_FATAL("Unhandled stage!");
938                         return "";
939                 case VK_SHADER_STAGE_FRAGMENT_BIT:
940                         return
941                                 "#version 450\n"
942                                 "void main (void)\n"
943                                 "{\n"
944                                 "  float pixelSize = 2.0f/1024.0f;\n"
945                                 "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
946                                 "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
947                                 "}\n";
948                 case VK_SHADER_STAGE_GEOMETRY_BIT:
949                         return
950                                 "#version 450\n"
951                                 "void main (void)\n"
952                                 "{\n"
953                                 "}\n";
954                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
955                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
956                         return
957                                 "#version 450\n"
958                                 "void main (void)\n"
959                                 "{\n"
960                                 "}\n";
961         }
962 }
963
964 void vkt::subgroups::initStdFrameBufferPrograms(        SourceCollections&                              programCollection,
965                                                                                                         const vk::ShaderBuildOptions&   buildOptions,
966                                                                                                         VkShaderStageFlags                              shaderStage,
967                                                                                                         VkFormat                                                format,
968                                                                                                         bool                                                    gsPointSize,
969                                                                                                         std::string                                             extHeader,
970                                                                                                         std::string                                             testSrc,
971                                                                                                         std::string                                             helperStr)
972 {
973         subgroups::setFragmentShaderFrameBuffer(programCollection);
974
975         if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
976                 subgroups::setVertexShaderFrameBuffer(programCollection);
977
978         if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
979         {
980                 std::ostringstream vertex;
981                 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
982                         << extHeader.c_str()
983                         << "layout(location = 0) in highp vec4 in_position;\n"
984                         << "layout(location = 0) out float result;\n"
985                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
986                         << "{\n"
987                         << "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
988                         << "};\n"
989                         << "\n"
990                         << helperStr.c_str()
991                         << "void main (void)\n"
992                         << "{\n"
993                         << "  uint tempRes;\n"
994                         << testSrc
995                         << "  result = float(tempRes);\n"
996                         << "  gl_Position = in_position;\n"
997                         << "  gl_PointSize = 1.0f;\n"
998                         << "}\n";
999                 programCollection.glslSources.add("vert")
1000                         << glu::VertexSource(vertex.str()) << buildOptions;
1001         }
1002         else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1003         {
1004                 std::ostringstream geometry;
1005
1006                 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1007                         << extHeader.c_str()
1008                         << "layout(points) in;\n"
1009                         << "layout(points, max_vertices = 1) out;\n"
1010                         << "layout(location = 0) out float out_color;\n"
1011                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
1012                         << "{\n"
1013                         << "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1014                         << "};\n"
1015                         << "\n"
1016                         << helperStr.c_str()
1017                         << "void main (void)\n"
1018                         << "{\n"
1019                         << "  uint tempRes;\n"
1020                         << testSrc
1021                         << "  out_color = float(tempRes);\n"
1022                         << "  gl_Position = gl_in[0].gl_Position;\n"
1023                         << (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1024                         << "  EmitVertex();\n"
1025                         << "  EndPrimitive();\n"
1026                         << "}\n";
1027
1028                 programCollection.glslSources.add("geometry")
1029                         << glu::GeometrySource(geometry.str()) << buildOptions;
1030         }
1031         else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1032         {
1033                 std::ostringstream controlSource;
1034                 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1035                         << extHeader.c_str()
1036                         << "layout(vertices = 2) out;\n"
1037                         << "layout(location = 0) out float out_color[];\n"
1038                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
1039                         << "{\n"
1040                         << "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1041                         << "};\n"
1042                         << "\n"
1043                         << helperStr.c_str()
1044                         << "void main (void)\n"
1045                         << "{\n"
1046                         << "  if (gl_InvocationID == 0)\n"
1047                         << "  {\n"
1048                         << "    gl_TessLevelOuter[0] = 1.0f;\n"
1049                         << "    gl_TessLevelOuter[1] = 1.0f;\n"
1050                         << "  }\n"
1051                         << "  uint tempRes;\n"
1052                         << testSrc
1053                         << "  out_color[gl_InvocationID] = float(tempRes);\n"
1054                         << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1055                         << "}\n";
1056
1057                 programCollection.glslSources.add("tesc")
1058                         << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1059                 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1060         }
1061         else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1062         {
1063                 ostringstream evaluationSource;
1064                 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1065                         << extHeader.c_str()
1066                         << "layout(isolines, equal_spacing, ccw ) in;\n"
1067                         << "layout(location = 0) out float out_color;\n"
1068                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
1069                         << "{\n"
1070                         << "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1071                         << "};\n"
1072                         << "\n"
1073                         << helperStr.c_str()
1074                         << "void main (void)\n"
1075                         << "{\n"
1076                         << "  uint tempRes;\n"
1077                         << testSrc
1078                         << "  out_color = float(tempRes);\n"
1079                         << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1080                         << "}\n";
1081
1082                 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1083                 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1084         }
1085         else
1086         {
1087                 DE_FATAL("Unsupported shader stage");
1088         }
1089 }
1090
1091 void vkt::subgroups::initStdPrograms(   vk::SourceCollections&                  programCollection,
1092                                                                                 const vk::ShaderBuildOptions&   buildOptions,
1093                                                                                 vk::VkShaderStageFlags                  shaderStage,
1094                                                                                 vk::VkFormat                                    format,
1095                                                                                 std::string                                             extHeader,
1096                                                                                 std::string                                             testSrc,
1097                                                                                 std::string                                             helperStr)
1098 {
1099         if (shaderStage == VK_SHADER_STAGE_COMPUTE_BIT)
1100         {
1101                 std::ostringstream src;
1102
1103                 src << "#version 450\n"
1104                         << extHeader.c_str()
1105                         << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1106                         "local_size_z_id = 2) in;\n"
1107                         << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1108                         << "{\n"
1109                         << "  uint result[];\n"
1110                         << "};\n"
1111                         << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
1112                         << "{\n"
1113                         << "  " << subgroups::getFormatNameForGLSL(format) << " data[];\n"
1114                         << "};\n"
1115                         << "\n"
1116                         << helperStr.c_str()
1117                         << "void main (void)\n"
1118                         << "{\n"
1119                         << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1120                         << "  highp uint offset = globalSize.x * ((globalSize.y * "
1121                         "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1122                         "gl_GlobalInvocationID.x;\n"
1123                         << "  uint tempRes;\n"
1124                         << testSrc
1125                         << "  result[offset] = tempRes;\n"
1126                         << "}\n";
1127
1128                 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1129         }
1130         else
1131         {
1132                 const string vertex =
1133                         "#version 450\n"
1134                         + extHeader +
1135                         "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1136                         "{\n"
1137                         "  uint result[];\n"
1138                         "};\n"
1139                         "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1140                         "{\n"
1141                         "  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1142                         "};\n"
1143                         "\n"
1144                         + helperStr +
1145                         "void main (void)\n"
1146                         "{\n"
1147                         "  uint tempRes;\n"
1148                         + testSrc +
1149                         "  result[gl_VertexIndex] = tempRes;\n"
1150                         "  float pixelSize = 2.0f/1024.0f;\n"
1151                         "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1152                         "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1153                         "  gl_PointSize = 1.0f;\n"
1154                         "}\n";
1155
1156                 const string tesc =
1157                         "#version 450\n"
1158                         + extHeader +
1159                         "layout(vertices=1) out;\n"
1160                         "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
1161                         "{\n"
1162                         "  uint result[];\n"
1163                         "};\n"
1164                         "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1165                         "{\n"
1166                         "  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1167                         "};\n"
1168                         "\n"
1169                         + helperStr +
1170                         "void main (void)\n"
1171                         "{\n"
1172                         "  uint tempRes;\n"
1173                         + testSrc +
1174                         "  result[gl_PrimitiveID] = tempRes;\n"
1175                         "  if (gl_InvocationID == 0)\n"
1176                         "  {\n"
1177                         "    gl_TessLevelOuter[0] = 1.0f;\n"
1178                         "    gl_TessLevelOuter[1] = 1.0f;\n"
1179                         "  }\n"
1180                         "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1181                         "}\n";
1182
1183                 const string tese =
1184                         "#version 450\n"
1185                         + extHeader +
1186                         "layout(isolines) in;\n"
1187                         "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
1188                         "{\n"
1189                         "  uint result[];\n"
1190                         "};\n"
1191                         "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1192                         "{\n"
1193                         "  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1194                         "};\n"
1195                         "\n"
1196                         + helperStr +
1197                         "void main (void)\n"
1198                         "{\n"
1199                         "  uint tempRes;\n"
1200                         + testSrc +
1201                         "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1202                         "  float pixelSize = 2.0f/1024.0f;\n"
1203                         "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1204                         "}\n";
1205
1206                 const string geometry =
1207                         "#version 450\n"
1208                         + extHeader +
1209                         "layout(${TOPOLOGY}) in;\n"
1210                         "layout(points, max_vertices = 1) out;\n"
1211                         "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
1212                         "{\n"
1213                         "  uint result[];\n"
1214                         "};\n"
1215                         "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1216                         "{\n"
1217                         "  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1218                         "};\n"
1219                         "\n"
1220                         + helperStr +
1221                         "void main (void)\n"
1222                         "{\n"
1223                         "  uint tempRes;\n"
1224                         + testSrc +
1225                         "  result[gl_PrimitiveIDIn] = tempRes;\n"
1226                         "  gl_Position = gl_in[0].gl_Position;\n"
1227                         "  EmitVertex();\n"
1228                         "  EndPrimitive();\n"
1229                         "}\n";
1230
1231                 const string fragment =
1232                         "#version 450\n"
1233                         + extHeader +
1234                         "layout(location = 0) out uint result;\n"
1235                         "layout(set = 0, binding = 4, std430) readonly buffer Buffer1\n"
1236                         "{\n"
1237                         "  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1238                         "};\n"
1239                         + helperStr +
1240                         "void main (void)\n"
1241                         "{\n"
1242                         "  uint tempRes;\n"
1243                         + testSrc +
1244                         "  result = tempRes;\n"
1245                         "}\n";
1246
1247                 subgroups::addNoSubgroupShader(programCollection);
1248
1249                 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1250                 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1251                 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1252                 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1253                 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1254         }
1255 }
1256
1257 bool vkt::subgroups::isSubgroupSupported(Context& context)
1258 {
1259         return context.contextSupports(vk::ApiVersion(1, 1, 0));
1260 }
1261
1262 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
1263         Context& context, const VkShaderStageFlags stage)
1264 {
1265         VkPhysicalDeviceSubgroupProperties subgroupProperties;
1266         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1267         subgroupProperties.pNext = DE_NULL;
1268
1269         VkPhysicalDeviceProperties2 properties;
1270         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1271         properties.pNext = &subgroupProperties;
1272
1273         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1274
1275         return (stage & subgroupProperties.supportedStages) ? true : false;
1276 }
1277
1278 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
1279         VkShaderStageFlags stage)
1280 {
1281         switch (stage)
1282         {
1283                 default:
1284                         return false;
1285                 case VK_SHADER_STAGE_COMPUTE_BIT:
1286                         return true;
1287         }
1288 }
1289
1290 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
1291         Context& context,
1292         VkSubgroupFeatureFlagBits bit) {
1293         VkPhysicalDeviceSubgroupProperties subgroupProperties;
1294         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1295         subgroupProperties.pNext = DE_NULL;
1296
1297         VkPhysicalDeviceProperties2 properties;
1298         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1299         properties.pNext = &subgroupProperties;
1300
1301         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1302
1303         return (bit & subgroupProperties.supportedOperations) ? true : false;
1304 }
1305
1306 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
1307 {
1308         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1309                                 context.getInstanceInterface(), context.getPhysicalDevice());
1310         return features.fragmentStoresAndAtomics ? true : false;
1311 }
1312
1313 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1314 {
1315         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1316                                 context.getInstanceInterface(), context.getPhysicalDevice());
1317         return features.vertexPipelineStoresAndAtomics ? true : false;
1318 }
1319
1320 bool vkt::subgroups::isInt64SupportedForDevice(Context& context)
1321 {
1322         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1323                                 context.getInstanceInterface(), context.getPhysicalDevice());
1324         return features.shaderInt64 ? true : false;
1325 }
1326
1327 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1328 {
1329         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1330                 context.getInstanceInterface(), context.getPhysicalDevice());
1331         return features.shaderTessellationAndGeometryPointSize ? true : false;
1332 }
1333
1334 bool vkt::subgroups::isFormatSupportedForDevice(Context& context, vk::VkFormat format)
1335 {
1336         VkPhysicalDeviceShaderSubgroupExtendedTypesFeaturesKHR subgroupExtendedTypesFeatures;
1337         deMemset(&subgroupExtendedTypesFeatures, 0, sizeof(subgroupExtendedTypesFeatures));
1338         subgroupExtendedTypesFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES_KHR;
1339         subgroupExtendedTypesFeatures.pNext = DE_NULL;
1340
1341         VkPhysicalDeviceShaderFloat16Int8FeaturesKHR float16Int8Features;
1342         deMemset(&float16Int8Features, 0, sizeof(float16Int8Features));
1343         float16Int8Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR;
1344         float16Int8Features.pNext = DE_NULL;
1345
1346         VkPhysicalDeviceFeatures2 features2;
1347         deMemset(&features2, 0, sizeof(features2));
1348         features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1349         features2.pNext = DE_NULL;
1350
1351         VkPhysicalDevice16BitStorageFeatures storage16bit;
1352         deMemset(&storage16bit, 0, sizeof(storage16bit));
1353         storage16bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
1354         storage16bit.pNext = DE_NULL;
1355         bool is16bitStorageSupported = context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage");
1356
1357         if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1358                 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1359         {
1360                 features2.pNext = &subgroupExtendedTypesFeatures;
1361                 subgroupExtendedTypesFeatures.pNext = &float16Int8Features;
1362                 if ( is16bitStorageSupported )
1363                 {
1364                         float16Int8Features.pNext = &storage16bit;
1365                 }
1366
1367         }
1368
1369         const PlatformInterface&                platformInterface               = context.getPlatformInterface();
1370         const VkInstance                                instance                                = context.getInstance();
1371         const InstanceDriver                    instanceDriver                  (platformInterface, instance);
1372
1373         instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1374
1375         switch (format)
1376         {
1377                 default:
1378                         return true;
1379                 case VK_FORMAT_R16_SFLOAT:
1380                 case VK_FORMAT_R16G16_SFLOAT:
1381                 case VK_FORMAT_R16G16B16_SFLOAT:
1382                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1383                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderFloat16 & storage16bit.storageBuffer16BitAccess ? true : false;
1384                 case VK_FORMAT_R64_SFLOAT:
1385                 case VK_FORMAT_R64G64_SFLOAT:
1386                 case VK_FORMAT_R64G64B64_SFLOAT:
1387                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1388                         return features2.features.shaderFloat64 ? true : false;
1389                 case VK_FORMAT_R8_SINT:
1390                 case VK_FORMAT_R8G8_SINT:
1391                 case VK_FORMAT_R8G8B8_SINT:
1392                 case VK_FORMAT_R8G8B8A8_SINT:
1393                 case VK_FORMAT_R8_UINT:
1394                 case VK_FORMAT_R8G8_UINT:
1395                 case VK_FORMAT_R8G8B8_UINT:
1396                 case VK_FORMAT_R8G8B8A8_UINT:
1397                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderInt8 ? true : false;
1398                 case VK_FORMAT_R16_SINT:
1399                 case VK_FORMAT_R16G16_SINT:
1400                 case VK_FORMAT_R16G16B16_SINT:
1401                 case VK_FORMAT_R16G16B16A16_SINT:
1402                 case VK_FORMAT_R16_UINT:
1403                 case VK_FORMAT_R16G16_UINT:
1404                 case VK_FORMAT_R16G16B16_UINT:
1405                 case VK_FORMAT_R16G16B16A16_UINT:
1406                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt16 & storage16bit.storageBuffer16BitAccess ? true : false;
1407                 case VK_FORMAT_R64_SINT:
1408                 case VK_FORMAT_R64G64_SINT:
1409                 case VK_FORMAT_R64G64B64_SINT:
1410                 case VK_FORMAT_R64G64B64A64_SINT:
1411                 case VK_FORMAT_R64_UINT:
1412                 case VK_FORMAT_R64G64_UINT:
1413                 case VK_FORMAT_R64G64B64_UINT:
1414                 case VK_FORMAT_R64G64B64A64_UINT:
1415                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt64 ? true : false;
1416         }
1417 }
1418
1419 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1420 {
1421         switch (format)
1422         {
1423                 default:
1424                         DE_FATAL("Unhandled format!");
1425                         return "";
1426                 case VK_FORMAT_R8_SINT:
1427                         return "int8_t";
1428                 case VK_FORMAT_R8G8_SINT:
1429                         return "i8vec2";
1430                 case VK_FORMAT_R8G8B8_SINT:
1431                         return "i8vec3";
1432                 case VK_FORMAT_R8G8B8A8_SINT:
1433                         return "i8vec4";
1434                 case VK_FORMAT_R8_UINT:
1435                         return "uint8_t";
1436                 case VK_FORMAT_R8G8_UINT:
1437                         return "u8vec2";
1438                 case VK_FORMAT_R8G8B8_UINT:
1439                         return "u8vec3";
1440                 case VK_FORMAT_R8G8B8A8_UINT:
1441                         return "u8vec4";
1442                 case VK_FORMAT_R16_SINT:
1443                         return "int16_t";
1444                 case VK_FORMAT_R16G16_SINT:
1445                         return "i16vec2";
1446                 case VK_FORMAT_R16G16B16_SINT:
1447                         return "i16vec3";
1448                 case VK_FORMAT_R16G16B16A16_SINT:
1449                         return "i16vec4";
1450                 case VK_FORMAT_R16_UINT:
1451                         return "uint16_t";
1452                 case VK_FORMAT_R16G16_UINT:
1453                         return "u16vec2";
1454                 case VK_FORMAT_R16G16B16_UINT:
1455                         return "u16vec3";
1456                 case VK_FORMAT_R16G16B16A16_UINT:
1457                         return "u16vec4";
1458                 case VK_FORMAT_R32_SINT:
1459                         return "int";
1460                 case VK_FORMAT_R32G32_SINT:
1461                         return "ivec2";
1462                 case VK_FORMAT_R32G32B32_SINT:
1463                         return "ivec3";
1464                 case VK_FORMAT_R32G32B32A32_SINT:
1465                         return "ivec4";
1466                 case VK_FORMAT_R32_UINT:
1467                         return "uint";
1468                 case VK_FORMAT_R32G32_UINT:
1469                         return "uvec2";
1470                 case VK_FORMAT_R32G32B32_UINT:
1471                         return "uvec3";
1472                 case VK_FORMAT_R32G32B32A32_UINT:
1473                         return "uvec4";
1474                 case VK_FORMAT_R64_SINT:
1475                         return "int64_t";
1476                 case VK_FORMAT_R64G64_SINT:
1477                         return "i64vec2";
1478                 case VK_FORMAT_R64G64B64_SINT:
1479                         return "i64vec3";
1480                 case VK_FORMAT_R64G64B64A64_SINT:
1481                         return "i64vec4";
1482                 case VK_FORMAT_R64_UINT:
1483                         return "uint64_t";
1484                 case VK_FORMAT_R64G64_UINT:
1485                         return "u64vec2";
1486                 case VK_FORMAT_R64G64B64_UINT:
1487                         return "u64vec3";
1488                 case VK_FORMAT_R64G64B64A64_UINT:
1489                         return "u64vec4";
1490                 case VK_FORMAT_R16_SFLOAT:
1491                         return "float16_t";
1492                 case VK_FORMAT_R16G16_SFLOAT:
1493                         return "f16vec2";
1494                 case VK_FORMAT_R16G16B16_SFLOAT:
1495                         return "f16vec3";
1496                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1497                         return "f16vec4";
1498                 case VK_FORMAT_R32_SFLOAT:
1499                         return "float";
1500                 case VK_FORMAT_R32G32_SFLOAT:
1501                         return "vec2";
1502                 case VK_FORMAT_R32G32B32_SFLOAT:
1503                         return "vec3";
1504                 case VK_FORMAT_R32G32B32A32_SFLOAT:
1505                         return "vec4";
1506                 case VK_FORMAT_R64_SFLOAT:
1507                         return "double";
1508                 case VK_FORMAT_R64G64_SFLOAT:
1509                         return "dvec2";
1510                 case VK_FORMAT_R64G64B64_SFLOAT:
1511                         return "dvec3";
1512                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1513                         return "dvec4";
1514                 case VK_FORMAT_R8_USCALED:
1515                         return "bool";
1516                 case VK_FORMAT_R8G8_USCALED:
1517                         return "bvec2";
1518                 case VK_FORMAT_R8G8B8_USCALED:
1519                         return "bvec3";
1520                 case VK_FORMAT_R8G8B8A8_USCALED:
1521                         return "bvec4";
1522         }
1523 }
1524
1525 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1526 {
1527         switch (format)
1528         {
1529                 default:
1530                         return "";
1531                 case VK_FORMAT_R8_SINT:
1532                 case VK_FORMAT_R8G8_SINT:
1533                 case VK_FORMAT_R8G8B8_SINT:
1534                 case VK_FORMAT_R8G8B8A8_SINT:
1535                 case VK_FORMAT_R8_UINT:
1536                 case VK_FORMAT_R8G8_UINT:
1537                 case VK_FORMAT_R8G8B8_UINT:
1538                 case VK_FORMAT_R8G8B8A8_UINT:
1539                         return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1540                 case VK_FORMAT_R16_SINT:
1541                 case VK_FORMAT_R16G16_SINT:
1542                 case VK_FORMAT_R16G16B16_SINT:
1543                 case VK_FORMAT_R16G16B16A16_SINT:
1544                 case VK_FORMAT_R16_UINT:
1545                 case VK_FORMAT_R16G16_UINT:
1546                 case VK_FORMAT_R16G16B16_UINT:
1547                 case VK_FORMAT_R16G16B16A16_UINT:
1548                         return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1549                 case VK_FORMAT_R64_SINT:
1550                 case VK_FORMAT_R64G64_SINT:
1551                 case VK_FORMAT_R64G64B64_SINT:
1552                 case VK_FORMAT_R64G64B64A64_SINT:
1553                 case VK_FORMAT_R64_UINT:
1554                 case VK_FORMAT_R64G64_UINT:
1555                 case VK_FORMAT_R64G64B64_UINT:
1556                 case VK_FORMAT_R64G64B64A64_UINT:
1557                         return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1558                 case VK_FORMAT_R16_SFLOAT:
1559                 case VK_FORMAT_R16G16_SFLOAT:
1560                 case VK_FORMAT_R16G16B16_SFLOAT:
1561                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1562                         return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1563         }
1564 }
1565
1566 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1567 {
1568         std::vector<VkFormat> formats;
1569
1570         formats.push_back(VK_FORMAT_R8_SINT);
1571         formats.push_back(VK_FORMAT_R8G8_SINT);
1572         formats.push_back(VK_FORMAT_R8G8B8_SINT);
1573         formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1574         formats.push_back(VK_FORMAT_R8_UINT);
1575         formats.push_back(VK_FORMAT_R8G8_UINT);
1576         formats.push_back(VK_FORMAT_R8G8B8_UINT);
1577         formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1578         formats.push_back(VK_FORMAT_R16_SINT);
1579         formats.push_back(VK_FORMAT_R16G16_SINT);
1580         formats.push_back(VK_FORMAT_R16G16B16_SINT);
1581         formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1582         formats.push_back(VK_FORMAT_R16_UINT);
1583         formats.push_back(VK_FORMAT_R16G16_UINT);
1584         formats.push_back(VK_FORMAT_R16G16B16_UINT);
1585         formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1586         formats.push_back(VK_FORMAT_R32_SINT);
1587         formats.push_back(VK_FORMAT_R32G32_SINT);
1588         formats.push_back(VK_FORMAT_R32G32B32_SINT);
1589         formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1590         formats.push_back(VK_FORMAT_R32_UINT);
1591         formats.push_back(VK_FORMAT_R32G32_UINT);
1592         formats.push_back(VK_FORMAT_R32G32B32_UINT);
1593         formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1594         formats.push_back(VK_FORMAT_R64_SINT);
1595         formats.push_back(VK_FORMAT_R64G64_SINT);
1596         formats.push_back(VK_FORMAT_R64G64B64_SINT);
1597         formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
1598         formats.push_back(VK_FORMAT_R64_UINT);
1599         formats.push_back(VK_FORMAT_R64G64_UINT);
1600         formats.push_back(VK_FORMAT_R64G64B64_UINT);
1601         formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
1602         formats.push_back(VK_FORMAT_R16_SFLOAT);
1603         formats.push_back(VK_FORMAT_R16G16_SFLOAT);
1604         formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
1605         formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
1606         formats.push_back(VK_FORMAT_R32_SFLOAT);
1607         formats.push_back(VK_FORMAT_R32G32_SFLOAT);
1608         formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
1609         formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
1610         formats.push_back(VK_FORMAT_R64_SFLOAT);
1611         formats.push_back(VK_FORMAT_R64G64_SFLOAT);
1612         formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
1613         formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
1614         formats.push_back(VK_FORMAT_R8_USCALED);
1615         formats.push_back(VK_FORMAT_R8G8_USCALED);
1616         formats.push_back(VK_FORMAT_R8G8B8_USCALED);
1617         formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
1618
1619         return formats;
1620 }
1621
1622 bool vkt::subgroups::isFormatSigned (VkFormat format)
1623 {
1624         switch (format)
1625         {
1626                 default:
1627                         return false;
1628                 case VK_FORMAT_R8_SINT:
1629                 case VK_FORMAT_R8G8_SINT:
1630                 case VK_FORMAT_R8G8B8_SINT:
1631                 case VK_FORMAT_R8G8B8A8_SINT:
1632                 case VK_FORMAT_R16_SINT:
1633                 case VK_FORMAT_R16G16_SINT:
1634                 case VK_FORMAT_R16G16B16_SINT:
1635                 case VK_FORMAT_R16G16B16A16_SINT:
1636                 case VK_FORMAT_R32_SINT:
1637                 case VK_FORMAT_R32G32_SINT:
1638                 case VK_FORMAT_R32G32B32_SINT:
1639                 case VK_FORMAT_R32G32B32A32_SINT:
1640                 case VK_FORMAT_R64_SINT:
1641                 case VK_FORMAT_R64G64_SINT:
1642                 case VK_FORMAT_R64G64B64_SINT:
1643                 case VK_FORMAT_R64G64B64A64_SINT:
1644                         return true;
1645         }
1646 }
1647
1648 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
1649 {
1650         switch (format)
1651         {
1652                 default:
1653                         return false;
1654                 case VK_FORMAT_R8_UINT:
1655                 case VK_FORMAT_R8G8_UINT:
1656                 case VK_FORMAT_R8G8B8_UINT:
1657                 case VK_FORMAT_R8G8B8A8_UINT:
1658                 case VK_FORMAT_R16_UINT:
1659                 case VK_FORMAT_R16G16_UINT:
1660                 case VK_FORMAT_R16G16B16_UINT:
1661                 case VK_FORMAT_R16G16B16A16_UINT:
1662                 case VK_FORMAT_R32_UINT:
1663                 case VK_FORMAT_R32G32_UINT:
1664                 case VK_FORMAT_R32G32B32_UINT:
1665                 case VK_FORMAT_R32G32B32A32_UINT:
1666                 case VK_FORMAT_R64_UINT:
1667                 case VK_FORMAT_R64G64_UINT:
1668                 case VK_FORMAT_R64G64B64_UINT:
1669                 case VK_FORMAT_R64G64B64A64_UINT:
1670                         return true;
1671         }
1672 }
1673
1674 bool vkt::subgroups::isFormatFloat (VkFormat format)
1675 {
1676         switch (format)
1677         {
1678                 default:
1679                         return false;
1680                 case VK_FORMAT_R16_SFLOAT:
1681                 case VK_FORMAT_R16G16_SFLOAT:
1682                 case VK_FORMAT_R16G16B16_SFLOAT:
1683                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1684                 case VK_FORMAT_R32_SFLOAT:
1685                 case VK_FORMAT_R32G32_SFLOAT:
1686                 case VK_FORMAT_R32G32B32_SFLOAT:
1687                 case VK_FORMAT_R32G32B32A32_SFLOAT:
1688                 case VK_FORMAT_R64_SFLOAT:
1689                 case VK_FORMAT_R64G64_SFLOAT:
1690                 case VK_FORMAT_R64G64B64_SFLOAT:
1691                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1692                         return true;
1693         }
1694 }
1695
1696 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
1697 {
1698         /*
1699                 "layout(location = 0) in highp vec4 in_position;\n"
1700                 "void main (void)\n"
1701                 "{\n"
1702                 "  gl_Position = in_position;\n"
1703                 "  gl_PointSize = 1.0f;\n"
1704                 "}\n";
1705         */
1706         programCollection.spirvAsmSources.add("vert") <<
1707                 "; SPIR-V\n"
1708                 "; Version: 1.3\n"
1709                 "; Generator: Khronos Glslang Reference Front End; 7\n"
1710                 "; Bound: 25\n"
1711                 "; Schema: 0\n"
1712                 "OpCapability Shader\n"
1713                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1714                 "OpMemoryModel Logical GLSL450\n"
1715                 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
1716                 "OpMemberDecorate %11 0 BuiltIn Position\n"
1717                 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1718                 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1719                 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1720                 "OpDecorate %11 Block\n"
1721                 "OpDecorate %17 Location 0\n"
1722                 "%2 = OpTypeVoid\n"
1723                 "%3 = OpTypeFunction %2\n"
1724                 "%6 = OpTypeFloat 32\n"
1725                 "%7 = OpTypeVector %6 4\n"
1726                 "%8 = OpTypeInt 32 0\n"
1727                 "%9 = OpConstant %8 1\n"
1728                 "%10 = OpTypeArray %6 %9\n"
1729                 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1730                 "%12 = OpTypePointer Output %11\n"
1731                 "%13 = OpVariable %12 Output\n"
1732                 "%14 = OpTypeInt 32 1\n"
1733                 "%15 = OpConstant %14 0\n"
1734                 "%16 = OpTypePointer Input %7\n"
1735                 "%17 = OpVariable %16 Input\n"
1736                 "%19 = OpTypePointer Output %7\n"
1737                 "%21 = OpConstant %14 1\n"
1738                 "%22 = OpConstant %6 1\n"
1739                 "%23 = OpTypePointer Output %6\n"
1740                 "%4 = OpFunction %2 None %3\n"
1741                 "%5 = OpLabel\n"
1742                 "%18 = OpLoad %7 %17\n"
1743                 "%20 = OpAccessChain %19 %13 %15\n"
1744                 "OpStore %20 %18\n"
1745                 "%24 = OpAccessChain %23 %13 %21\n"
1746                 "OpStore %24 %22\n"
1747                 "OpReturn\n"
1748                 "OpFunctionEnd\n";
1749 }
1750
1751 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
1752 {
1753         /*
1754                 "layout(location = 0) in float in_color;\n"
1755                 "layout(location = 0) out uint out_color;\n"
1756                 "void main()\n"
1757                 {\n"
1758                 "       out_color = uint(in_color);\n"
1759                 "}\n";
1760         */
1761         programCollection.spirvAsmSources.add("fragment") <<
1762                 "; SPIR-V\n"
1763                 "; Version: 1.3\n"
1764                 "; Generator: Khronos Glslang Reference Front End; 2\n"
1765                 "; Bound: 14\n"
1766                 "; Schema: 0\n"
1767                 "OpCapability Shader\n"
1768                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1769                 "OpMemoryModel Logical GLSL450\n"
1770                 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
1771                 "OpExecutionMode %4 OriginUpperLeft\n"
1772                 "OpDecorate %8 Location 0\n"
1773                 "OpDecorate %11 Location 0\n"
1774                 "%2 = OpTypeVoid\n"
1775                 "%3 = OpTypeFunction %2\n"
1776                 "%6 = OpTypeInt 32 0\n"
1777                 "%7 = OpTypePointer Output %6\n"
1778                 "%8 = OpVariable %7 Output\n"
1779                 "%9 = OpTypeFloat 32\n"
1780                 "%10 = OpTypePointer Input %9\n"
1781                 "%11 = OpVariable %10 Input\n"
1782                 "%4 = OpFunction %2 None %3\n"
1783                 "%5 = OpLabel\n"
1784                 "%12 = OpLoad %9 %11\n"
1785                 "%13 = OpConvertFToU %6 %12\n"
1786                 "OpStore %8 %13\n"
1787                 "OpReturn\n"
1788                 "OpFunctionEnd\n";
1789 }
1790
1791 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
1792 {
1793         /*
1794                 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1795                 "#extension GL_EXT_tessellation_shader : require\n"
1796                 "layout(vertices = 2) out;\n"
1797                 "void main (void)\n"
1798                 "{\n"
1799                 "  if (gl_InvocationID == 0)\n"
1800                 "  {\n"
1801                 "    gl_TessLevelOuter[0] = 1.0f;\n"
1802                 "    gl_TessLevelOuter[1] = 1.0f;\n"
1803                 "  }\n"
1804                 "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1805                 "}\n";
1806         */
1807         programCollection.spirvAsmSources.add("tesc") <<
1808                 "; SPIR-V\n"
1809                 "; Version: 1.3\n"
1810                 "; Generator: Khronos Glslang Reference Front End; 2\n"
1811                 "; Bound: 46\n"
1812                 "; Schema: 0\n"
1813                 "OpCapability Tessellation\n"
1814                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1815                 "OpMemoryModel Logical GLSL450\n"
1816                 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
1817                 "OpExecutionMode %4 OutputVertices 2\n"
1818                 "OpDecorate %8 BuiltIn InvocationId\n"
1819                 "OpDecorate %20 Patch\n"
1820                 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1821                 "OpMemberDecorate %29 0 BuiltIn Position\n"
1822                 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1823                 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1824                 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1825                 "OpDecorate %29 Block\n"
1826                 "OpMemberDecorate %35 0 BuiltIn Position\n"
1827                 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
1828                 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
1829                 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
1830                 "OpDecorate %35 Block\n"
1831                 "%2 = OpTypeVoid\n"
1832                 "%3 = OpTypeFunction %2\n"
1833                 "%6 = OpTypeInt 32 1\n"
1834                 "%7 = OpTypePointer Input %6\n"
1835                 "%8 = OpVariable %7 Input\n"
1836                 "%10 = OpConstant %6 0\n"
1837                 "%11 = OpTypeBool\n"
1838                 "%15 = OpTypeFloat 32\n"
1839                 "%16 = OpTypeInt 32 0\n"
1840                 "%17 = OpConstant %16 4\n"
1841                 "%18 = OpTypeArray %15 %17\n"
1842                 "%19 = OpTypePointer Output %18\n"
1843                 "%20 = OpVariable %19 Output\n"
1844                 "%21 = OpConstant %15 1\n"
1845                 "%22 = OpTypePointer Output %15\n"
1846                 "%24 = OpConstant %6 1\n"
1847                 "%26 = OpTypeVector %15 4\n"
1848                 "%27 = OpConstant %16 1\n"
1849                 "%28 = OpTypeArray %15 %27\n"
1850                 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1851                 "%30 = OpConstant %16 2\n"
1852                 "%31 = OpTypeArray %29 %30\n"
1853                 "%32 = OpTypePointer Output %31\n"
1854                 "%33 = OpVariable %32 Output\n"
1855                 "%35 = OpTypeStruct %26 %15 %28 %28\n"
1856                 "%36 = OpConstant %16 32\n"
1857                 "%37 = OpTypeArray %35 %36\n"
1858                 "%38 = OpTypePointer Input %37\n"
1859                 "%39 = OpVariable %38 Input\n"
1860                 "%41 = OpTypePointer Input %26\n"
1861                 "%44 = OpTypePointer Output %26\n"
1862                 "%4 = OpFunction %2 None %3\n"
1863                 "%5 = OpLabel\n"
1864                 "%9 = OpLoad %6 %8\n"
1865                 "%12 = OpIEqual %11 %9 %10\n"
1866                 "OpSelectionMerge %14 None\n"
1867                 "OpBranchConditional %12 %13 %14\n"
1868                 "%13 = OpLabel\n"
1869                 "%23 = OpAccessChain %22 %20 %10\n"
1870                 "OpStore %23 %21\n"
1871                 "%25 = OpAccessChain %22 %20 %24\n"
1872                 "OpStore %25 %21\n"
1873                 "OpBranch %14\n"
1874                 "%14 = OpLabel\n"
1875                 "%34 = OpLoad %6 %8\n"
1876                 "%40 = OpLoad %6 %8\n"
1877                 "%42 = OpAccessChain %41 %39 %40 %10\n"
1878                 "%43 = OpLoad %26 %42\n"
1879                 "%45 = OpAccessChain %44 %33 %34 %10\n"
1880                 "OpStore %45 %43\n"
1881                 "OpReturn\n"
1882                 "OpFunctionEnd\n";
1883 }
1884
1885 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
1886 {
1887         /*
1888                 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1889                 "#extension GL_EXT_tessellation_shader : require\n"
1890                 "layout(isolines, equal_spacing, ccw ) in;\n"
1891                 "layout(location = 0) in float in_color[];\n"
1892                 "layout(location = 0) out float out_color;\n"
1893                 "\n"
1894                 "void main (void)\n"
1895                 "{\n"
1896                 "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1897                 "  out_color = in_color[0];\n"
1898                 "}\n";
1899         */
1900         programCollection.spirvAsmSources.add("tese") <<
1901                 "; SPIR-V\n"
1902                 "; Version: 1.3\n"
1903                 "; Generator: Khronos Glslang Reference Front End; 2\n"
1904                 "; Bound: 45\n"
1905                 "; Schema: 0\n"
1906                 "OpCapability Tessellation\n"
1907                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1908                 "OpMemoryModel Logical GLSL450\n"
1909                 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
1910                 "OpExecutionMode %4 Isolines\n"
1911                 "OpExecutionMode %4 SpacingEqual\n"
1912                 "OpExecutionMode %4 VertexOrderCcw\n"
1913                 "OpMemberDecorate %11 0 BuiltIn Position\n"
1914                 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1915                 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1916                 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1917                 "OpDecorate %11 Block\n"
1918                 "OpMemberDecorate %16 0 BuiltIn Position\n"
1919                 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
1920                 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
1921                 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
1922                 "OpDecorate %16 Block\n"
1923                 "OpDecorate %29 BuiltIn TessCoord\n"
1924                 "OpDecorate %39 Location 0\n"
1925                 "OpDecorate %42 Location 0\n"
1926                 "%2 = OpTypeVoid\n"
1927                 "%3 = OpTypeFunction %2\n"
1928                 "%6 = OpTypeFloat 32\n"
1929                 "%7 = OpTypeVector %6 4\n"
1930                 "%8 = OpTypeInt 32 0\n"
1931                 "%9 = OpConstant %8 1\n"
1932                 "%10 = OpTypeArray %6 %9\n"
1933                 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1934                 "%12 = OpTypePointer Output %11\n"
1935                 "%13 = OpVariable %12 Output\n"
1936                 "%14 = OpTypeInt 32 1\n"
1937                 "%15 = OpConstant %14 0\n"
1938                 "%16 = OpTypeStruct %7 %6 %10 %10\n"
1939                 "%17 = OpConstant %8 32\n"
1940                 "%18 = OpTypeArray %16 %17\n"
1941                 "%19 = OpTypePointer Input %18\n"
1942                 "%20 = OpVariable %19 Input\n"
1943                 "%21 = OpTypePointer Input %7\n"
1944                 "%24 = OpConstant %14 1\n"
1945                 "%27 = OpTypeVector %6 3\n"
1946                 "%28 = OpTypePointer Input %27\n"
1947                 "%29 = OpVariable %28 Input\n"
1948                 "%30 = OpConstant %8 0\n"
1949                 "%31 = OpTypePointer Input %6\n"
1950                 "%36 = OpTypePointer Output %7\n"
1951                 "%38 = OpTypePointer Output %6\n"
1952                 "%39 = OpVariable %38 Output\n"
1953                 "%40 = OpTypeArray %6 %17\n"
1954                 "%41 = OpTypePointer Input %40\n"
1955                 "%42 = OpVariable %41 Input\n"
1956                 "%4 = OpFunction %2 None %3\n"
1957                 "%5 = OpLabel\n"
1958                 "%22 = OpAccessChain %21 %20 %15 %15\n"
1959                 "%23 = OpLoad %7 %22\n"
1960                 "%25 = OpAccessChain %21 %20 %24 %15\n"
1961                 "%26 = OpLoad %7 %25\n"
1962                 "%32 = OpAccessChain %31 %29 %30\n"
1963                 "%33 = OpLoad %6 %32\n"
1964                 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
1965                 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
1966                 "%37 = OpAccessChain %36 %13 %15\n"
1967                 "OpStore %37 %35\n"
1968                 "%43 = OpAccessChain %31 %42 %15\n"
1969                 "%44 = OpLoad %6 %43\n"
1970                 "OpStore %39 %44\n"
1971                 "OpReturn\n"
1972                 "OpFunctionEnd\n";
1973 }
1974
1975 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
1976 {
1977         tcu::StringTemplate geometryTemplate(glslTemplate);
1978
1979         map<string, string>             linesParams;
1980         linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
1981
1982         map<string, string>             pointsParams;
1983         pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
1984
1985         collection.add("geometry_lines")        << glu::GeometrySource(geometryTemplate.specialize(linesParams))        << options;
1986         collection.add("geometry_points")       << glu::GeometrySource(geometryTemplate.specialize(pointsParams))       << options;
1987 }
1988
1989 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
1990 {
1991         tcu::StringTemplate geometryTemplate(spirvTemplate);
1992
1993         map<string, string>             linesParams;
1994         linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
1995
1996         map<string, string>             pointsParams;
1997         pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
1998
1999         collection.add("geometry_lines")        << geometryTemplate.specialize(linesParams)             << options;
2000         collection.add("geometry_points")       << geometryTemplate.specialize(pointsParams)    << options;
2001 }
2002
2003 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
2004 {
2005         const vk::VkFormat format = data.format;
2006         const vk::VkDeviceSize size = data.numElements *
2007                 (data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2008         if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2009         {
2010                 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2011
2012                 switch (format)
2013                 {
2014                         default:
2015                                 DE_FATAL("Illegal buffer format");
2016                                 break;
2017                         case VK_FORMAT_R8_SINT:
2018                         case VK_FORMAT_R8G8_SINT:
2019                         case VK_FORMAT_R8G8B8_SINT:
2020                         case VK_FORMAT_R8G8B8A8_SINT:
2021                         case VK_FORMAT_R8_UINT:
2022                         case VK_FORMAT_R8G8_UINT:
2023                         case VK_FORMAT_R8G8B8_UINT:
2024                         case VK_FORMAT_R8G8B8A8_UINT:
2025                         {
2026                                 deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2027
2028                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2029                                 {
2030                                         ptr[k] = rnd.getUint8();
2031                                 }
2032                         }
2033                         break;
2034                         case VK_FORMAT_R16_SINT:
2035                         case VK_FORMAT_R16G16_SINT:
2036                         case VK_FORMAT_R16G16B16_SINT:
2037                         case VK_FORMAT_R16G16B16A16_SINT:
2038                         case VK_FORMAT_R16_UINT:
2039                         case VK_FORMAT_R16G16_UINT:
2040                         case VK_FORMAT_R16G16B16_UINT:
2041                         case VK_FORMAT_R16G16B16A16_UINT:
2042                         {
2043                                 deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2044
2045                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2046                                 {
2047                                         ptr[k] = rnd.getUint16();
2048                                 }
2049                         }
2050                         break;
2051                         case VK_FORMAT_R8_USCALED:
2052                         case VK_FORMAT_R8G8_USCALED:
2053                         case VK_FORMAT_R8G8B8_USCALED:
2054                         case VK_FORMAT_R8G8B8A8_USCALED:
2055                         {
2056                                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2057
2058                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2059                                 {
2060                                         deUint32 r = rnd.getUint32();
2061                                         ptr[k] = (r & 1) ? r : 0;
2062                                 }
2063                         }
2064                         break;
2065                         case VK_FORMAT_R32_SINT:
2066                         case VK_FORMAT_R32G32_SINT:
2067                         case VK_FORMAT_R32G32B32_SINT:
2068                         case VK_FORMAT_R32G32B32A32_SINT:
2069                         case VK_FORMAT_R32_UINT:
2070                         case VK_FORMAT_R32G32_UINT:
2071                         case VK_FORMAT_R32G32B32_UINT:
2072                         case VK_FORMAT_R32G32B32A32_UINT:
2073                         {
2074                                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2075
2076                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2077                                 {
2078                                         ptr[k] = rnd.getUint32();
2079                                 }
2080                         }
2081                         break;
2082                         case VK_FORMAT_R64_SINT:
2083                         case VK_FORMAT_R64G64_SINT:
2084                         case VK_FORMAT_R64G64B64_SINT:
2085                         case VK_FORMAT_R64G64B64A64_SINT:
2086                         case VK_FORMAT_R64_UINT:
2087                         case VK_FORMAT_R64G64_UINT:
2088                         case VK_FORMAT_R64G64B64_UINT:
2089                         case VK_FORMAT_R64G64B64A64_UINT:
2090                         {
2091                                 deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2092
2093                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2094                                 {
2095                                         ptr[k] = rnd.getUint64();
2096                                 }
2097                         }
2098                         break;
2099                         case VK_FORMAT_R16_SFLOAT:
2100                         case VK_FORMAT_R16G16_SFLOAT:
2101                         case VK_FORMAT_R16G16B16_SFLOAT:
2102                         case VK_FORMAT_R16G16B16A16_SFLOAT:
2103                         {
2104                                 deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2105
2106                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2107                                 {
2108                                         ptr[k] = deFloat32To16(rnd.getFloat());
2109                                 }
2110                         }
2111                         break;
2112                         case VK_FORMAT_R32_SFLOAT:
2113                         case VK_FORMAT_R32G32_SFLOAT:
2114                         case VK_FORMAT_R32G32B32_SFLOAT:
2115                         case VK_FORMAT_R32G32B32A32_SFLOAT:
2116                         {
2117                                 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2118
2119                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2120                                 {
2121                                         ptr[k] = rnd.getFloat();
2122                                 }
2123                         }
2124                         break;
2125                         case VK_FORMAT_R64_SFLOAT:
2126                         case VK_FORMAT_R64G64_SFLOAT:
2127                         case VK_FORMAT_R64G64B64_SFLOAT:
2128                         case VK_FORMAT_R64G64B64A64_SFLOAT:
2129                         {
2130                                 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2131
2132                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2133                                 {
2134                                         ptr[k] = rnd.getDouble();
2135                                 }
2136                         }
2137                         break;
2138                 }
2139         }
2140         else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2141         {
2142                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2143
2144                 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2145                 {
2146                         ptr[k] = 0;
2147                 }
2148         }
2149
2150         if (subgroups::SSBOData::InitializeNone != data.initializeType)
2151         {
2152                 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2153         }
2154 }
2155
2156 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2157 {
2158         switch(shaderStage)
2159         {
2160                 case VK_SHADER_STAGE_VERTEX_BIT:
2161                         return 0u;
2162                         break;
2163                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2164                         return 1u;
2165                         break;
2166                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2167                         return 2u;
2168                         break;
2169                 case VK_SHADER_STAGE_GEOMETRY_BIT:
2170                         return 3u;
2171                         break;
2172                 default:
2173                         DE_ASSERT(0);
2174                         return -1;
2175         }
2176         DE_ASSERT(0);
2177         return -1;
2178 }
2179
2180 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (
2181         Context& context, VkFormat format, SSBOData* extraData,
2182         deUint32 extraDataCount,
2183         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2184         const VkShaderStageFlags shaderStage)
2185 {
2186         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
2187         const VkDevice                                                  device                                  = context.getDevice();
2188         const deUint32                                                  maxWidth                                = getMaxWidth();
2189         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
2190         DescriptorSetLayoutBuilder                              layoutBuilder;
2191         DescriptorPoolBuilder                                   poolBuilder;
2192         DescriptorSetUpdateBuilder                              updateBuilder;
2193         Move <VkDescriptorPool>                                 descriptorPool;
2194         Move <VkDescriptorSet>                                  descriptorSet;
2195
2196         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(vk, device,
2197                                                                                                                                                 context.getBinaryCollection().get("vert"), 0u));
2198         const Unique<VkShaderModule>                    teCtrlShaderModule              (createShaderModule(vk, device,
2199                                                                                                                                                 context.getBinaryCollection().get("tesc"), 0u));
2200         const Unique<VkShaderModule>                    teEvalShaderModule              (createShaderModule(vk, device,
2201                                                                                                                                                 context.getBinaryCollection().get("tese"), 0u));
2202         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(vk, device,
2203                                                                                                                                         context.getBinaryCollection().get("fragment"), 0u));
2204         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
2205
2206         const VkVertexInputBindingDescription   vertexInputBinding              =
2207         {
2208                 0u,                                                                                     // binding;
2209                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
2210                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
2211         };
2212
2213         const VkVertexInputAttributeDescription vertexInputAttribute    =
2214         {
2215                 0u,
2216                 0u,
2217                 VK_FORMAT_R32G32B32A32_SFLOAT,
2218                 0u
2219         };
2220
2221         for (deUint32 i = 0u; i < extraDataCount; i++)
2222         {
2223                 if (extraData[i].isImage)
2224                 {
2225                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2226                 }
2227                 else
2228                 {
2229                         vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2230                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2231                 }
2232                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2233                 initializeMemory(context, alloc, extraData[i]);
2234         }
2235
2236         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2237                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2238
2239         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(vk, device));
2240
2241         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
2242
2243         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
2244                                                                                                                                         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2245                                                                                                                                         VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2246                                                                                                                                         *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2247                                                                                                                                         *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
2248
2249         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2250                 poolBuilder.addType(inputBuffers[ndx]->getType());
2251
2252         if (extraDataCount > 0)
2253         {
2254                 descriptorPool = poolBuilder.build(vk, device,
2255                                                         VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2256                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2257         }
2258
2259         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2260         {
2261                 if (inputBuffers[buffersNdx]->isImage())
2262                 {
2263                         VkDescriptorImageInfo info =
2264                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2265                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2266
2267                         updateBuilder.writeSingle(*descriptorSet,
2268                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2269                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2270                 }
2271                 else
2272                 {
2273                         VkDescriptorBufferInfo info =
2274                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2275                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2276
2277                         updateBuilder.writeSingle(*descriptorSet,
2278                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2279                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2280                 }
2281         }
2282
2283         updateBuilder.update(vk, device);
2284
2285         const VkQueue                                                   queue                                   = context.getUniversalQueue();
2286         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
2287         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
2288         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
2289         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
2290         const vk::VkDeviceSize                                  vertexBufferSize                = 2ull * maxWidth * sizeof(tcu::Vec4);
2291         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2292         unsigned                                                                totalIterations                 = 0u;
2293         unsigned                                                                failedIterations                = 0u;
2294         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2295
2296         {
2297                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
2298                 std::vector<tcu::Vec4>  data                            (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2299                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
2300                 float                                   leftHandPosition        = -1.0f;
2301
2302                 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2303                 {
2304                         data[ndx][0] = leftHandPosition;
2305                         leftHandPosition += pixelSize;
2306                         data[ndx+1][0] = leftHandPosition;
2307                 }
2308
2309                 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2310                 flushAlloc(vk, device, alloc);
2311         }
2312
2313         const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2314         const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
2315         const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
2316         const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2317         Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2318         const VkDeviceSize                      vertexBufferOffset      = 0u;
2319
2320         for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2321         {
2322                 totalIterations++;
2323
2324                 beginCommandBuffer(vk, *cmdBuffer);
2325                 {
2326
2327                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2328                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2329
2330                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2331
2332                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2333
2334                         if (extraDataCount > 0)
2335                         {
2336                                 vk.cmdBindDescriptorSets(*cmdBuffer,
2337                                         VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2338                                         &descriptorSet.get(), 0u, DE_NULL);
2339                         }
2340
2341                         vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2342                         vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2343
2344                         endRenderPass(vk, *cmdBuffer);
2345
2346                         copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2347                         endCommandBuffer(vk, *cmdBuffer);
2348
2349                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2350                 }
2351
2352                 {
2353                         const Allocation& allocResult = imageBufferResult.getAllocation();
2354                         invalidateAlloc(vk, device, allocResult);
2355
2356                         std::vector<const void*> datas;
2357                         datas.push_back(allocResult.getHostPtr());
2358                         if (!checkResult(datas, width/2u, subgroupSize))
2359                                 failedIterations++;
2360                 }
2361         }
2362
2363         if (0 < failedIterations)
2364         {
2365                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2366
2367                 context.getTestContext().getLog()
2368                                 << TestLog::Message << valuesPassed << " / "
2369                                 << totalIterations << " values passed" << TestLog::EndMessage;
2370                 return tcu::TestStatus::fail("Failed!");
2371         }
2372
2373         return tcu::TestStatus::pass("OK");
2374 }
2375
2376 bool vkt::subgroups::check(std::vector<const void*> datas,
2377         deUint32 width, deUint32 ref)
2378 {
2379         const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2380
2381         for (deUint32 n = 0; n < width; ++n)
2382         {
2383                 if (data[n] != ref)
2384                 {
2385                         return false;
2386                 }
2387         }
2388
2389         return true;
2390 }
2391
2392 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
2393         const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2394         deUint32 ref)
2395 {
2396         const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2397         const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2398         const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2399
2400         return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2401 }
2402
2403 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
2404         Context& context, VkFormat format, SSBOData* extraData,
2405         deUint32 extraDataCount,
2406         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2407 {
2408         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
2409         const VkDevice                                                  device                                  = context.getDevice();
2410         const deUint32                                                  maxWidth                                = getMaxWidth();
2411         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
2412         DescriptorSetLayoutBuilder                              layoutBuilder;
2413         DescriptorPoolBuilder                                   poolBuilder;
2414         DescriptorSetUpdateBuilder                              updateBuilder;
2415         Move <VkDescriptorPool>                                 descriptorPool;
2416         Move <VkDescriptorSet>                                  descriptorSet;
2417
2418         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2419         const Unique<VkShaderModule>                    geometryShaderModule    (createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2420         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2421         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
2422         const VkVertexInputBindingDescription   vertexInputBinding              =
2423         {
2424                 0u,                                                                                     // binding;
2425                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
2426                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
2427         };
2428
2429         const VkVertexInputAttributeDescription vertexInputAttribute    =
2430         {
2431                 0u,
2432                 0u,
2433                 VK_FORMAT_R32G32B32A32_SFLOAT,
2434                 0u
2435         };
2436
2437         for (deUint32 i = 0u; i < extraDataCount; i++)
2438         {
2439                 if (extraData[i].isImage)
2440                 {
2441                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2442                 }
2443                 else
2444                 {
2445                         vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2446                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2447                 }
2448                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2449                 initializeMemory(context, alloc, extraData[i]);
2450         }
2451
2452         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2453                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2454
2455         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(vk, device));
2456
2457         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
2458
2459         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
2460                                                                                                                                         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2461                                                                                                                                         *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2462                                                                                                                                         *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
2463
2464         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2465                 poolBuilder.addType(inputBuffers[ndx]->getType());
2466
2467         if (extraDataCount > 0)
2468         {
2469                 descriptorPool = poolBuilder.build(vk, device,
2470                                                         VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2471                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2472         }
2473
2474         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2475         {
2476                 if (inputBuffers[buffersNdx]->isImage())
2477                 {
2478                         VkDescriptorImageInfo info =
2479                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2480                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2481
2482                         updateBuilder.writeSingle(*descriptorSet,
2483                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2484                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2485                 }
2486                 else
2487                 {
2488                         VkDescriptorBufferInfo info =
2489                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2490                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2491
2492                         updateBuilder.writeSingle(*descriptorSet,
2493                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2494                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2495                 }
2496         }
2497
2498         updateBuilder.update(vk, device);
2499
2500         const VkQueue                                                   queue                                   = context.getUniversalQueue();
2501         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
2502         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
2503         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
2504         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
2505         const vk::VkDeviceSize                                  vertexBufferSize                = maxWidth * sizeof(tcu::Vec4);
2506         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2507         unsigned                                                                totalIterations                 = 0u;
2508         unsigned                                                                failedIterations                = 0u;
2509         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2510
2511         {
2512                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
2513                 std::vector<tcu::Vec4>  data                            (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2514                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
2515                 float                                   leftHandPosition        = -1.0f;
2516
2517                 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2518                 {
2519                         data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2520                         leftHandPosition += pixelSize;
2521                 }
2522
2523                 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2524                 flushAlloc(vk, device, alloc);
2525         }
2526
2527         const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2528         const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
2529         const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
2530         const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2531         Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2532         const VkDeviceSize                      vertexBufferOffset      = 0u;
2533
2534         for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2535         {
2536                 totalIterations++;
2537
2538                 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2539                 {
2540                         const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2541                         initializeMemory(context, alloc, extraData[ndx]);
2542                 }
2543
2544                 beginCommandBuffer(vk, *cmdBuffer);
2545                 {
2546                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2547
2548                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2549
2550                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2551
2552                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2553
2554                         if (extraDataCount > 0)
2555                         {
2556                                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2557                                         &descriptorSet.get(), 0u, DE_NULL);
2558                         }
2559
2560                         vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2561
2562                         vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2563
2564                         endRenderPass(vk, *cmdBuffer);
2565
2566                         copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2567
2568                         endCommandBuffer(vk, *cmdBuffer);
2569
2570                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2571                 }
2572
2573                 {
2574                         const Allocation& allocResult = imageBufferResult.getAllocation();
2575                         invalidateAlloc(vk, device, allocResult);
2576
2577                         std::vector<const void*> datas;
2578                         datas.push_back(allocResult.getHostPtr());
2579                         if (!checkResult(datas, width, subgroupSize))
2580                                 failedIterations++;
2581                 }
2582         }
2583
2584         if (0 < failedIterations)
2585         {
2586                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2587
2588                 context.getTestContext().getLog()
2589                                 << TestLog::Message << valuesPassed << " / "
2590                                 << totalIterations << " values passed" << TestLog::EndMessage;
2591
2592                 return tcu::TestStatus::fail("Failed!");
2593         }
2594
2595         return tcu::TestStatus::pass("OK");
2596 }
2597
2598
2599 tcu::TestStatus vkt::subgroups::allStages(
2600         Context& context, VkFormat format, SSBOData* extraDatas,
2601         deUint32 extraDatasCount,
2602         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2603         const VkShaderStageFlags shaderStageTested)
2604 {
2605         const DeviceInterface&                  vk                                      = context.getDeviceInterface();
2606         const VkDevice                                  device                          = context.getDevice();
2607         const deUint32                                  maxWidth                        = getMaxWidth();
2608         vector<VkShaderStageFlagBits>   stagesVector;
2609         VkShaderStageFlags                              shaderStageRequired     = (VkShaderStageFlags)0ull;
2610
2611         Move<VkShaderModule>                    vertexShaderModule;
2612         Move<VkShaderModule>                    teCtrlShaderModule;
2613         Move<VkShaderModule>                    teEvalShaderModule;
2614         Move<VkShaderModule>                    geometryShaderModule;
2615         Move<VkShaderModule>                    fragmentShaderModule;
2616
2617         if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
2618         {
2619                 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
2620         }
2621         if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2622         {
2623                 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
2624                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
2625                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
2626         }
2627         if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2628         {
2629                 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
2630                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
2631                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2632         }
2633         if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
2634         {
2635                 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
2636                 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
2637                 shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
2638         }
2639         if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2640         {
2641                 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
2642                 shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
2643         }
2644
2645         const deUint32  stagesCount     = static_cast<deUint32>(stagesVector.size());
2646         const string    vert            = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)                                    ? "vert_noSubgroup"             : "vert";
2647         const string    tesc            = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)              ? "tesc_noSubgroup"             : "tesc";
2648         const string    tese            = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)   ? "tese_noSubgroup"             : "tese";
2649
2650         shaderStageRequired = shaderStageTested | shaderStageRequired;
2651
2652         vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
2653         if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2654         {
2655                 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
2656                 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
2657         }
2658         if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
2659         {
2660                 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2661                 {
2662                         // tessellation shaders output line primitives
2663                         geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
2664                 }
2665                 else
2666                 {
2667                         // otherwise points are processed by geometry shader
2668                         geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
2669                 }
2670         }
2671         if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
2672                 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
2673
2674         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
2675
2676         DescriptorSetLayoutBuilder layoutBuilder;
2677         // The implicit result SSBO we use to store our outputs from the shader
2678         for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
2679         {
2680                 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
2681                 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
2682                 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2683
2684                 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
2685         }
2686
2687         for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
2688         {
2689                 const deUint32 datasNdx = ndx - stagesCount;
2690                 if (extraDatas[datasNdx].isImage)
2691                 {
2692                         inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
2693                 }
2694                 else
2695                 {
2696                         const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
2697                         inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2698                 }
2699
2700                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2701                 initializeMemory(context, alloc, extraDatas[datasNdx]);
2702
2703                 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
2704                                                                 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
2705         }
2706
2707         const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2708
2709         const Unique<VkPipelineLayout> pipelineLayout(
2710                 makePipelineLayout(vk, device, *descriptorSetLayout));
2711
2712         const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2713         const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2714                                                                                 shaderStageRequired,
2715                                                                                 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
2716                                                                                 *renderPass,
2717                                                                                 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
2718
2719         Move <VkDescriptorPool> descriptorPool;
2720         Move <VkDescriptorSet>  descriptorSet;
2721
2722         if (inputBuffers.size() > 0)
2723         {
2724                 DescriptorPoolBuilder poolBuilder;
2725
2726                 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
2727                 {
2728                         poolBuilder.addType(inputBuffers[ndx]->getType());
2729                 }
2730
2731                 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2732
2733                 // Create descriptor set
2734                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2735
2736                 DescriptorSetUpdateBuilder updateBuilder;
2737
2738                 for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
2739                 {
2740                         deUint32 binding;
2741                         if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
2742                         else binding = extraDatas[ndx -stagesCount].binding;
2743
2744                         if (inputBuffers[ndx]->isImage())
2745                         {
2746                                 VkDescriptorImageInfo info =
2747                                         makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2748                                                                                         inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2749
2750                                 updateBuilder.writeSingle(      *descriptorSet,
2751                                                                                         DescriptorSetUpdateBuilder::Location::binding(binding),
2752                                                                                         inputBuffers[ndx]->getType(), &info);
2753                         }
2754                         else
2755                         {
2756                                 VkDescriptorBufferInfo info =
2757                                         makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2758                                                         0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2759
2760                                 updateBuilder.writeSingle(      *descriptorSet,
2761                                                                                                         DescriptorSetUpdateBuilder::Location::binding(binding),
2762                                                                                                         inputBuffers[ndx]->getType(), &info);
2763                         }
2764                 }
2765
2766                 updateBuilder.update(vk, device);
2767         }
2768
2769         {
2770                 const VkQueue                                   queue                                   = context.getUniversalQueue();
2771                 const deUint32                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
2772                 const Unique<VkCommandPool>             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
2773                 const deUint32                                  subgroupSize                    = getSubgroupSize(context);
2774                 const Unique<VkCommandBuffer>   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
2775                 unsigned                                                totalIterations                 = 0u;
2776                 unsigned                                                failedIterations                = 0u;
2777                 Image                                                   resultImage                             (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2778                 const Unique<VkFramebuffer>             framebuffer                             (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
2779                 const VkViewport                                viewport                                = makeViewport(maxWidth, 1u);
2780                 const VkRect2D                                  scissor                                 = makeRect2D(maxWidth, 1u);
2781                 const vk::VkDeviceSize                  imageResultSize                 = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2782                 Buffer                                                  imageBufferResult               (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2783                 const VkImageSubresourceRange   subresourceRange                =
2784                 {
2785                         VK_IMAGE_ASPECT_COLOR_BIT,                                                                                      //VkImageAspectFlags    aspectMask
2786                         0u,                                                                                                                                     //deUint32                              baseMipLevel
2787                         1u,                                                                                                                                     //deUint32                              levelCount
2788                         0u,                                                                                                                                     //deUint32                              baseArrayLayer
2789                         1u                                                                                                                                      //deUint32                              layerCount
2790                 };
2791
2792                 const VkImageMemoryBarrier              colorAttachmentBarrier  = makeImageMemoryBarrier(
2793                         (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2794                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2795                         resultImage.getImage(), subresourceRange);
2796
2797                 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2798                 {
2799                         for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
2800                         {
2801                                 // re-init the data
2802                                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2803                                 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
2804                         }
2805
2806                         totalIterations++;
2807
2808                         beginCommandBuffer(vk, *cmdBuffer);
2809
2810                         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
2811
2812                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2813
2814                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2815
2816                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2817
2818                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2819
2820                         if (stagesCount + extraDatasCount > 0)
2821                                 vk.cmdBindDescriptorSets(*cmdBuffer,
2822                                                 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2823                                                 &descriptorSet.get(), 0u, DE_NULL);
2824
2825                         vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
2826
2827                         endRenderPass(vk, *cmdBuffer);
2828
2829                         copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2830
2831                         endCommandBuffer(vk, *cmdBuffer);
2832
2833                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2834
2835                         for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
2836                         {
2837                                 std::vector<const void*> datas;
2838                                 if (!inputBuffers[ndx]->isImage())
2839                                 {
2840                                         const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
2841                                         invalidateAlloc(vk, device, resultAlloc);
2842                                         // we always have our result data first
2843                                         datas.push_back(resultAlloc.getHostPtr());
2844                                 }
2845
2846                                 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2847                                 {
2848                                         const deUint32 datasNdx = index - stagesCount;
2849                                         if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
2850                                         {
2851                                                 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
2852                                                 invalidateAlloc(vk, device, resultAlloc);
2853                                                 // we always have our result data first
2854                                                 datas.push_back(resultAlloc.getHostPtr());
2855                                         }
2856                                 }
2857
2858                                 if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
2859                                         failedIterations++;
2860                         }
2861                         if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2862                         {
2863                                 std::vector<const void*> datas;
2864                                 const Allocation& resultAlloc = imageBufferResult.getAllocation();
2865                                 invalidateAlloc(vk, device, resultAlloc);
2866
2867                                 // we always have our result data first
2868                                 datas.push_back(resultAlloc.getHostPtr());
2869
2870                                 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2871                                 {
2872                                         const deUint32 datasNdx = index - stagesCount;
2873                                         if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
2874                                         {
2875                                                 const Allocation& alloc = inputBuffers[index]->getAllocation();
2876                                                 invalidateAlloc(vk, device, alloc);
2877                                                 // we always have our result data first
2878                                                 datas.push_back(alloc.getHostPtr());
2879                                         }
2880                                 }
2881
2882                                 if (!checkResult(datas, width, subgroupSize))
2883                                         failedIterations++;
2884                         }
2885
2886                         vk.resetCommandBuffer(*cmdBuffer, 0);
2887                 }
2888
2889                 if (0 < failedIterations)
2890                 {
2891                         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2892
2893                         context.getTestContext().getLog()
2894                                 << TestLog::Message << valuesPassed << " / "
2895                                 << totalIterations << " values passed" << TestLog::EndMessage;
2896
2897                         return tcu::TestStatus::fail("Failed!");
2898                 }
2899         }
2900
2901         return tcu::TestStatus::pass("OK");
2902 }
2903
2904 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
2905         SSBOData* extraData, deUint32 extraDataCount,
2906         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2907 {
2908         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
2909         const VkDevice                                                  device                                  = context.getDevice();
2910         const VkQueue                                                   queue                                   = context.getUniversalQueue();
2911         const deUint32                                                  maxWidth                                = getMaxWidth();
2912         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
2913         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
2914         DescriptorSetLayoutBuilder                              layoutBuilder;
2915         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2916         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2917         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
2918
2919         const VkVertexInputBindingDescription   vertexInputBinding              =
2920         {
2921                 0u,                                                                                     // binding;
2922                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
2923                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
2924         };
2925
2926         const VkVertexInputAttributeDescription vertexInputAttribute    =
2927         {
2928                 0u,
2929                 0u,
2930                 VK_FORMAT_R32G32B32A32_SFLOAT,
2931                 0u
2932         };
2933
2934         for (deUint32 i = 0u; i < extraDataCount; i++)
2935         {
2936                 if (extraData[i].isImage)
2937                 {
2938                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2939                 }
2940                 else
2941                 {
2942                         vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2943                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2944                 }
2945                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2946                 initializeMemory(context, alloc, extraData[i]);
2947         }
2948
2949         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2950                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
2951
2952         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(vk, device));
2953
2954         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
2955
2956         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
2957                                                                                                                                                 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2958                                                                                                                                                 *vertexShaderModule, *fragmentShaderModule,
2959                                                                                                                                                 DE_NULL, DE_NULL, DE_NULL,
2960                                                                                                                                                 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
2961                                                                                                                                                 &vertexInputBinding, &vertexInputAttribute, true, format));
2962         DescriptorPoolBuilder                                   poolBuilder;
2963         DescriptorSetUpdateBuilder                              updateBuilder;
2964
2965
2966         for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2967                 poolBuilder.addType(inputBuffers[ndx]->getType());
2968
2969         Move <VkDescriptorPool>                                 descriptorPool;
2970         Move <VkDescriptorSet>                                  descriptorSet;
2971
2972         if (extraDataCount > 0)
2973         {
2974                 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2975                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2976         }
2977
2978         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2979         {
2980                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2981                 initializeMemory(context, alloc, extraData[ndx]);
2982         }
2983
2984         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2985         {
2986                 if (inputBuffers[buffersNdx]->isImage())
2987                 {
2988                         VkDescriptorImageInfo info =
2989                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2990                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2991
2992                         updateBuilder.writeSingle(*descriptorSet,
2993                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2994                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2995                 }
2996                 else
2997                 {
2998                         VkDescriptorBufferInfo info =
2999                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3000                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3001
3002                         updateBuilder.writeSingle(*descriptorSet,
3003                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3004                                                                                 inputBuffers[buffersNdx]->getType(), &info);
3005                 }
3006         }
3007         updateBuilder.update(vk, device);
3008
3009         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
3010
3011         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
3012
3013         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
3014
3015         const vk::VkDeviceSize                                  vertexBufferSize                = maxWidth * sizeof(tcu::Vec4);
3016         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3017
3018         unsigned                                                                totalIterations                 = 0u;
3019         unsigned                                                                failedIterations                = 0u;
3020
3021         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3022
3023         {
3024                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
3025                 std::vector<tcu::Vec4>  data                            (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3026                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
3027                 float                                   leftHandPosition        = -1.0f;
3028
3029                 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3030                 {
3031                         data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3032                         leftHandPosition += pixelSize;
3033                 }
3034
3035                 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3036                 flushAlloc(vk, device, alloc);
3037         }
3038
3039         const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3040         const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
3041         const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
3042         const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3043         Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3044         const VkDeviceSize                      vertexBufferOffset      = 0u;
3045
3046         for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3047         {
3048                 totalIterations++;
3049
3050                 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3051                 {
3052                         const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3053                         initializeMemory(context, alloc, extraData[ndx]);
3054                 }
3055
3056                 beginCommandBuffer(vk, *cmdBuffer);
3057                 {
3058                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3059
3060                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3061
3062                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3063
3064                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3065
3066                         if (extraDataCount > 0)
3067                         {
3068                                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3069                                         &descriptorSet.get(), 0u, DE_NULL);
3070                         }
3071
3072                         vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3073
3074                         vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3075
3076                         endRenderPass(vk, *cmdBuffer);
3077
3078                         copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3079
3080                         endCommandBuffer(vk, *cmdBuffer);
3081
3082                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3083                 }
3084
3085                 {
3086                         const Allocation& allocResult = imageBufferResult.getAllocation();
3087                         invalidateAlloc(vk, device, allocResult);
3088
3089                         std::vector<const void*> datas;
3090                         datas.push_back(allocResult.getHostPtr());
3091                         if (!checkResult(datas, width, subgroupSize))
3092                                 failedIterations++;
3093                 }
3094         }
3095
3096         if (0 < failedIterations)
3097         {
3098                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3099
3100                 context.getTestContext().getLog()
3101                         << TestLog::Message << valuesPassed << " / "
3102                         << totalIterations << " values passed" << TestLog::EndMessage;
3103
3104                 return tcu::TestStatus::fail("Failed!");
3105         }
3106
3107         return tcu::TestStatus::pass("OK");
3108 }
3109
3110
3111 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest     (Context& context, VkFormat format, SSBOData* extraDatas,
3112         deUint32 extraDatasCount,
3113         bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
3114                                                 deUint32 height, deUint32 subgroupSize))
3115 {
3116         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
3117         const VkDevice                                                  device                                  = context.getDevice();
3118         const VkQueue                                                   queue                                   = context.getUniversalQueue();
3119         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
3120         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule
3121                                                                                                                                                 (vk, device, context.getBinaryCollection().get("vert"), 0u));
3122         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule
3123                                                                                                                                                 (vk, device, context.getBinaryCollection().get("fragment"), 0u));
3124
3125         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
3126
3127         for (deUint32 i = 0; i < extraDatasCount; i++)
3128         {
3129                 if (extraDatas[i].isImage)
3130                 {
3131                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3132                                                                                 static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3133                 }
3134                 else
3135                 {
3136                         vk::VkDeviceSize size =
3137                                 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3138                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3139                 }
3140
3141                 const Allocation& alloc = inputBuffers[i]->getAllocation();
3142                 initializeMemory(context, alloc, extraDatas[i]);
3143         }
3144
3145         DescriptorSetLayoutBuilder layoutBuilder;
3146
3147         for (deUint32 i = 0; i < extraDatasCount; i++)
3148         {
3149                 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
3150                                                                  VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3151         }
3152
3153         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3154                 layoutBuilder.build(vk, device));
3155
3156         const Unique<VkPipelineLayout> pipelineLayout(
3157                 makePipelineLayout(vk, device, *descriptorSetLayout));
3158
3159         const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3160         const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3161                                                                           VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3162                                                                           *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3163                                                                           DE_NULL, DE_NULL, true));
3164
3165         DescriptorPoolBuilder poolBuilder;
3166
3167         // To stop validation complaining, always add at least one type to pool.
3168         poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3169         for (deUint32 i = 0; i < extraDatasCount; i++)
3170         {
3171                 poolBuilder.addType(inputBuffers[i]->getType());
3172         }
3173
3174         Move<VkDescriptorPool> descriptorPool;
3175         // Create descriptor set
3176         Move<VkDescriptorSet> descriptorSet;
3177
3178         if (extraDatasCount > 0)
3179         {
3180                 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3181
3182                 descriptorSet   = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3183         }
3184
3185         DescriptorSetUpdateBuilder updateBuilder;
3186
3187         for (deUint32 i = 0; i < extraDatasCount; i++)
3188         {
3189                 if (inputBuffers[i]->isImage())
3190                 {
3191                         VkDescriptorImageInfo info =
3192                                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3193                                                                                 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3194
3195                         updateBuilder.writeSingle(*descriptorSet,
3196                                                                           DescriptorSetUpdateBuilder::Location::binding(i),
3197                                                                           inputBuffers[i]->getType(), &info);
3198                 }
3199                 else
3200                 {
3201                         VkDescriptorBufferInfo info =
3202                                 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
3203                                                                                  0ull, inputBuffers[i]->getAsBuffer()->getSize());
3204
3205                         updateBuilder.writeSingle(*descriptorSet,
3206                                                                           DescriptorSetUpdateBuilder::Location::binding(i),
3207                                                                           inputBuffers[i]->getType(), &info);
3208                 }
3209         }
3210
3211         if (extraDatasCount > 0)
3212                 updateBuilder.update(vk, device);
3213
3214         const Unique<VkCommandPool>             cmdPool                         (makeCommandPool(vk, device, queueFamilyIndex));
3215
3216         const deUint32                                  subgroupSize            = getSubgroupSize(context);
3217
3218         const Unique<VkCommandBuffer>   cmdBuffer                       (makeCommandBuffer(context, *cmdPool));
3219
3220         unsigned totalIterations = 0;
3221         unsigned failedIterations = 0;
3222
3223         for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3224         {
3225                 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3226                 {
3227                         totalIterations++;
3228
3229                         // re-init the data
3230                         for (deUint32 i = 0; i < extraDatasCount; i++)
3231                         {
3232                                 const Allocation& alloc = inputBuffers[i]->getAllocation();
3233                                 initializeMemory(context, alloc, extraDatas[i]);
3234                         }
3235
3236                         VkDeviceSize formatSize = getFormatSizeInBytes(format);
3237                         const VkDeviceSize resultImageSizeInBytes =
3238                                 width * height * formatSize;
3239
3240                         Image resultImage(context, width, height, format,
3241                                                           VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
3242                                                           VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3243
3244                         Buffer resultBuffer(context, resultImageSizeInBytes,
3245                                                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3246
3247                         const Unique<VkFramebuffer> framebuffer(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3248
3249                         beginCommandBuffer(vk, *cmdBuffer);
3250
3251                         VkViewport viewport = makeViewport(width, height);
3252
3253                         vk.cmdSetViewport(
3254                                 *cmdBuffer, 0, 1, &viewport);
3255
3256                         VkRect2D scissor = {{0, 0}, {width, height}};
3257
3258                         vk.cmdSetScissor(
3259                                 *cmdBuffer, 0, 1, &scissor);
3260
3261                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3262
3263                         vk.cmdBindPipeline(
3264                                 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3265
3266                         if (extraDatasCount > 0)
3267                         {
3268                                 vk.cmdBindDescriptorSets(*cmdBuffer,
3269                                                 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3270                                                 &descriptorSet.get(), 0u, DE_NULL);
3271                         }
3272
3273                         vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3274
3275                         endRenderPass(vk, *cmdBuffer);
3276
3277                         copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3278
3279                         endCommandBuffer(vk, *cmdBuffer);
3280
3281                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3282
3283                         std::vector<const void*> datas;
3284                         {
3285                                 const Allocation& resultAlloc = resultBuffer.getAllocation();
3286                                 invalidateAlloc(vk, device, resultAlloc);
3287
3288                                 // we always have our result data first
3289                                 datas.push_back(resultAlloc.getHostPtr());
3290                         }
3291
3292                         if (!checkResult(datas, width, height, subgroupSize))
3293                         {
3294                                 failedIterations++;
3295                         }
3296
3297                         vk.resetCommandBuffer(*cmdBuffer, 0);
3298                 }
3299         }
3300
3301         if (0 < failedIterations)
3302         {
3303                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3304
3305                 context.getTestContext().getLog()
3306                         << TestLog::Message << valuesPassed << " / "
3307                         << totalIterations << " values passed" << TestLog::EndMessage;
3308
3309                 return tcu::TestStatus::fail("Failed!");
3310         }
3311
3312         return tcu::TestStatus::pass("OK");
3313 }
3314
3315 tcu::TestStatus vkt::subgroups::makeComputeTest(
3316         Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
3317         bool (*checkResult)(std::vector<const void*> datas,
3318                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
3319                                                 deUint32 subgroupSize))
3320 {
3321         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
3322         const VkDevice                                                  device                                  = context.getDevice();
3323         const VkQueue                                                   queue                                   = context.getUniversalQueue();
3324         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
3325         VkDeviceSize                                                    elementSize                             = getFormatSizeInBytes(format);
3326
3327         const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
3328                                                                                   maxSupportedSubgroupSize() *
3329                                                                                   maxSupportedSubgroupSize();
3330         const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
3331
3332         Buffer resultBuffer(
3333                 context, resultBufferSizeInBytes);
3334
3335         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
3336
3337         for (deUint32 i = 0; i < inputsCount; i++)
3338         {
3339                 if (inputs[i].isImage)
3340                 {
3341                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3342                                                                                 static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
3343                 }
3344                 else
3345                 {
3346                         vk::VkDeviceSize size =
3347                                 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3348                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3349                 }
3350
3351                 const Allocation& alloc = inputBuffers[i]->getAllocation();
3352                 initializeMemory(context, alloc, inputs[i]);
3353         }
3354
3355         DescriptorSetLayoutBuilder layoutBuilder;
3356         layoutBuilder.addBinding(
3357                 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3358
3359         for (deUint32 i = 0; i < inputsCount; i++)
3360         {
3361                 layoutBuilder.addBinding(
3362                         inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3363         }
3364
3365         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3366                 layoutBuilder.build(vk, device));
3367
3368         const Unique<VkShaderModule> shaderModule(
3369                 createShaderModule(vk, device,
3370                                                    context.getBinaryCollection().get("comp"), 0u));
3371         const Unique<VkPipelineLayout> pipelineLayout(
3372                 makePipelineLayout(vk, device, *descriptorSetLayout));
3373
3374         DescriptorPoolBuilder poolBuilder;
3375
3376         poolBuilder.addType(resultBuffer.getType());
3377
3378         for (deUint32 i = 0; i < inputsCount; i++)
3379         {
3380                 poolBuilder.addType(inputBuffers[i]->getType());
3381         }
3382
3383         const Unique<VkDescriptorPool> descriptorPool(
3384                 poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3385
3386         // Create descriptor set
3387         const Unique<VkDescriptorSet> descriptorSet(
3388                 makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3389
3390         DescriptorSetUpdateBuilder updateBuilder;
3391
3392         const VkDescriptorBufferInfo resultDescriptorInfo =
3393                 makeDescriptorBufferInfo(
3394                         resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
3395
3396         updateBuilder.writeSingle(*descriptorSet,
3397                                                           DescriptorSetUpdateBuilder::Location::binding(0u),
3398                                                           VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
3399
3400         for (deUint32 i = 0; i < inputsCount; i++)
3401         {
3402                 if (inputBuffers[i]->isImage())
3403                 {
3404                         VkDescriptorImageInfo info =
3405                                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3406                                                                                 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3407
3408                         updateBuilder.writeSingle(*descriptorSet,
3409                                                                           DescriptorSetUpdateBuilder::Location::binding(i + 1),
3410                                                                           inputBuffers[i]->getType(), &info);
3411                 }
3412                 else
3413                 {
3414                         vk::VkDeviceSize size =
3415                                 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3416                         VkDescriptorBufferInfo info =
3417                                 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
3418
3419                         updateBuilder.writeSingle(*descriptorSet,
3420                                                                           DescriptorSetUpdateBuilder::Location::binding(i + 1),
3421                                                                           inputBuffers[i]->getType(), &info);
3422                 }
3423         }
3424
3425         updateBuilder.update(vk, device);
3426
3427         const Unique<VkCommandPool>             cmdPool                         (makeCommandPool(vk, device, queueFamilyIndex));
3428
3429         unsigned totalIterations = 0;
3430         unsigned failedIterations = 0;
3431
3432         const deUint32 subgroupSize = getSubgroupSize(context);
3433
3434         const Unique<VkCommandBuffer> cmdBuffer(
3435                 makeCommandBuffer(context, *cmdPool));
3436
3437         const deUint32 numWorkgroups[3] = {4, 2, 2};
3438
3439         const deUint32 localSizesToTestCount = 8;
3440         deUint32 localSizesToTest[localSizesToTestCount][3] =
3441         {
3442                 {1, 1, 1},
3443                 {subgroupSize, 1, 1},
3444                 {1, subgroupSize, 1},
3445                 {1, 1, subgroupSize},
3446                 {32, 4, 1},
3447                 {1, 4, 32},
3448                 {3, 5, 7},
3449                 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
3450         };
3451
3452         Move<VkPipeline> pipelines[localSizesToTestCount - 1];
3453         pipelines[0] =
3454                 makeComputePipeline(context, *pipelineLayout, *shaderModule,
3455                                                         VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, (VkPipeline) DE_NULL,
3456                                                         localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]);
3457
3458         for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
3459         {
3460                 const deUint32 nextX = localSizesToTest[index][0];
3461                 const deUint32 nextY = localSizesToTest[index][1];
3462                 const deUint32 nextZ = localSizesToTest[index][2];
3463
3464                 pipelines[index] =
3465                         makeComputePipeline(context, *pipelineLayout, *shaderModule,
3466                                                                 VK_PIPELINE_CREATE_DERIVATIVE_BIT, *pipelines[0],
3467                                                                 nextX, nextY, nextZ);
3468         }
3469
3470         for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
3471         {
3472
3473                 // we are running one test
3474                 totalIterations++;
3475
3476                 beginCommandBuffer(vk, *cmdBuffer);
3477
3478                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelines[index]);
3479
3480                 vk.cmdBindDescriptorSets(*cmdBuffer,
3481                                 VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
3482                                 &descriptorSet.get(), 0u, DE_NULL);
3483
3484                 vk.cmdDispatch(*cmdBuffer,numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
3485
3486                 endCommandBuffer(vk, *cmdBuffer);
3487
3488                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3489
3490                 std::vector<const void*> datas;
3491
3492                 {
3493                         const Allocation& resultAlloc = resultBuffer.getAllocation();
3494                         invalidateAlloc(vk, device, resultAlloc);
3495
3496                         // we always have our result data first
3497                         datas.push_back(resultAlloc.getHostPtr());
3498                 }
3499
3500                 for (deUint32 i = 0; i < inputsCount; i++)
3501                 {
3502                         if (!inputBuffers[i]->isImage())
3503                         {
3504                                 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
3505                                 invalidateAlloc(vk, device, resultAlloc);
3506
3507                                 // we always have our result data first
3508                                 datas.push_back(resultAlloc.getHostPtr());
3509                         }
3510                 }
3511
3512                 if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
3513                 {
3514                         failedIterations++;
3515                 }
3516
3517                 vk.resetCommandBuffer(*cmdBuffer, 0);
3518         }
3519
3520         if (0 < failedIterations)
3521         {
3522                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3523
3524                 context.getTestContext().getLog()
3525                         << TestLog::Message << valuesPassed << " / "
3526                         << totalIterations << " values passed" << TestLog::EndMessage;
3527
3528                 return tcu::TestStatus::fail("Failed!");
3529         }
3530
3531         return tcu::TestStatus::pass("OK");
3532 }