b11379949df6cf1b376b720d2a688a2f5dbf489f
[platform/upstream/VK-GL-CTS.git] / external / openglcts / modules / common / subgroups / glcSubgroupsTestsUtils.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 The Khronos Group Inc.
6  * Copyright (c) 2017 Codeplay Software Ltd.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */ /*!
21  * \file
22  * \brief Subgroups Tests Utils
23  */ /*--------------------------------------------------------------------*/
24
25 #include "vktSubgroupsTestsUtils.hpp"
26 #include "deRandom.hpp"
27 #include "tcuCommandLine.hpp"
28 #include "tcuStringTemplate.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkImageUtil.hpp"
31 #include "vkTypeUtil.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkObjUtil.hpp"
34
35 using namespace tcu;
36 using namespace std;
37 using namespace vk;
38 using namespace vkt;
39
40 namespace
41 {
42 deUint32 getFormatSizeInBytes(const VkFormat format)
43 {
44         switch (format)
45         {
46                 default:
47                         DE_FATAL("Unhandled format!");
48                         return 0;
49                 case VK_FORMAT_R32_SINT:
50                 case VK_FORMAT_R32_UINT:
51                         return sizeof(deInt32);
52                 case VK_FORMAT_R32G32_SINT:
53                 case VK_FORMAT_R32G32_UINT:
54                         return static_cast<deUint32>(sizeof(deInt32) * 2);
55                 case VK_FORMAT_R32G32B32_SINT:
56                 case VK_FORMAT_R32G32B32_UINT:
57                 case VK_FORMAT_R32G32B32A32_SINT:
58                 case VK_FORMAT_R32G32B32A32_UINT:
59                         return static_cast<deUint32>(sizeof(deInt32) * 4);
60                 case VK_FORMAT_R32_SFLOAT:
61                         return 4;
62                 case VK_FORMAT_R32G32_SFLOAT:
63                         return 8;
64                 case VK_FORMAT_R32G32B32_SFLOAT:
65                         return 16;
66                 case VK_FORMAT_R32G32B32A32_SFLOAT:
67                         return 16;
68                 case VK_FORMAT_R64_SFLOAT:
69                         return 8;
70                 case VK_FORMAT_R64G64_SFLOAT:
71                         return 16;
72                 case VK_FORMAT_R64G64B64_SFLOAT:
73                         return 32;
74                 case VK_FORMAT_R64G64B64A64_SFLOAT:
75                         return 32;
76                 // The below formats are used to represent bool and bvec* types. These
77                 // types are passed to the shader as int and ivec* types, before the
78                 // calculations are done as booleans. We need a distinct type here so
79                 // that the shader generators can switch on it and generate the correct
80                 // shader source for testing.
81                 case VK_FORMAT_R8_USCALED:
82                         return sizeof(deInt32);
83                 case VK_FORMAT_R8G8_USCALED:
84                         return static_cast<deUint32>(sizeof(deInt32) * 2);
85                 case VK_FORMAT_R8G8B8_USCALED:
86                 case VK_FORMAT_R8G8B8A8_USCALED:
87                         return static_cast<deUint32>(sizeof(deInt32) * 4);
88         }
89 }
90
91 Move<VkPipelineLayout> makePipelineLayout(
92         Context& context, const VkDescriptorSetLayout descriptorSetLayout)
93 {
94         const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = {
95                 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
96                 DE_NULL,                          // const void*            pNext;
97                 0u,                                       // VkPipelineLayoutCreateFlags    flags;
98                 1u,                                       // deUint32             setLayoutCount;
99                 &descriptorSetLayout, // const VkDescriptorSetLayout*   pSetLayouts;
100                 0u,                                       // deUint32             pushConstantRangeCount;
101                 DE_NULL, // const VkPushConstantRange*   pPushConstantRanges;
102         };
103         return createPipelineLayout(context.getDeviceInterface(),
104                                                                 context.getDevice(), &pipelineLayoutParams);
105 }
106
107 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
108 {
109         VkAttachmentReference colorReference = {
110                 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
111         };
112
113         const VkSubpassDescription subpassDescription = {0u,
114                                                                                                          VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
115                                                                                                          DE_NULL, DE_NULL, 0, DE_NULL
116                                                                                                         };
117
118         const VkSubpassDependency subpassDependencies[2] = {
119                 {   VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
120                         VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
121                         VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
122                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
123                         VK_DEPENDENCY_BY_REGION_BIT
124                 },
125                 {   0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
126                         VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
127                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
128                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
129                         VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
130                 },
131         };
132
133         VkAttachmentDescription attachmentDescription = {0u, format,
134                                                                                                          VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
135                                                                                                          VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
136                                                                                                          VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
137                                                                                                          VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
138                                                                                                         };
139
140         const VkRenderPassCreateInfo renderPassCreateInfo = {
141                 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
142                 &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
143         };
144
145         return createRenderPass(context.getDeviceInterface(), context.getDevice(),
146                                                         &renderPassCreateInfo);
147 }
148
149 Move<VkFramebuffer> makeFramebuffer(Context& context,
150                                                                         const VkRenderPass renderPass, const VkImageView imageView, deUint32 width,
151                                                                         deUint32 height)
152 {
153         const VkFramebufferCreateInfo framebufferCreateInfo = {
154                 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, DE_NULL, 0u, renderPass, 1,
155                 &imageView, width, height, 1
156         };
157
158         return createFramebuffer(context.getDeviceInterface(), context.getDevice(),
159                                                          &framebufferCreateInfo);
160 }
161
162 Move<VkPipeline> makeGraphicsPipeline(Context&                                                                  context,
163                                                                           const VkPipelineLayout                                        pipelineLayout,
164                                                                           const VkShaderStageFlags                                      stages,
165                                                                           const VkShaderModule                                          vertexShaderModule,
166                                                                           const VkShaderModule                                          fragmentShaderModule,
167                                                                           const VkShaderModule                                          geometryShaderModule,
168                                                                           const VkShaderModule                                          tessellationControlModule,
169                                                                           const VkShaderModule                                          tessellationEvaluationModule,
170                                                                           const VkRenderPass                                            renderPass,
171                                                                           const VkPrimitiveTopology                                     topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
172                                                                           const VkVertexInputBindingDescription*        vertexInputBindingDescription = DE_NULL,
173                                                                           const VkVertexInputAttributeDescription*      vertexInputAttributeDescriptions = DE_NULL,
174                                                                           const bool                                                            frameBufferTests = false,
175                                                                           const vk::VkFormat                                            attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
176 {
177         std::vector<VkViewport> noViewports;
178         std::vector<VkRect2D>   noScissors;
179
180         const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
181         {
182                 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,      // VkStructureType                                                              sType;
183                 DE_NULL,                                                                                                        // const void*                                                                  pNext;
184                 0u,                                                                                                                     // VkPipelineVertexInputStateCreateFlags                flags;
185                 vertexInputBindingDescription == DE_NULL ? 0u : 1u,                     // deUint32                                                                             vertexBindingDescriptionCount;
186                 vertexInputBindingDescription,                                                          // const VkVertexInputBindingDescription*               pVertexBindingDescriptions;
187                 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,          // deUint32                                                                             vertexAttributeDescriptionCount;
188                 vertexInputAttributeDescriptions,                                                       // const VkVertexInputAttributeDescription*             pVertexAttributeDescriptions;
189         };
190
191         const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
192         const VkColorComponentFlags colorComponent =
193                                                                                                 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
194                                                                                                 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
195                                                                                                 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
196                                                                                                 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
197
198         const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
199         {
200                 VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
201                 VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
202                 colorComponent
203         };
204
205         const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
206         {
207                 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
208                 VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
209                 { 0.0f, 0.0f, 0.0f, 0.0f }
210         };
211
212         const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
213
214         return vk::makeGraphicsPipeline(context.getDeviceInterface(),   // const DeviceInterface&                        vk
215                                                                         context.getDevice(),                    // const VkDevice                                device
216                                                                         pipelineLayout,                                 // const VkPipelineLayout                        pipelineLayout
217                                                                         vertexShaderModule,                             // const VkShaderModule                          vertexShaderModule
218                                                                         tessellationControlModule,              // const VkShaderModule                          tessellationControlShaderModule
219                                                                         tessellationEvaluationModule,   // const VkShaderModule                          tessellationEvalShaderModule
220                                                                         geometryShaderModule,                   // const VkShaderModule                          geometryShaderModule
221                                                                         fragmentShaderModule,                   // const VkShaderModule                          fragmentShaderModule
222                                                                         renderPass,                                             // const VkRenderPass                            renderPass
223                                                                         noViewports,                                    // const std::vector<VkViewport>&                viewports
224                                                                         noScissors,                                             // const std::vector<VkRect2D>&                  scissors
225                                                                         topology,                                               // const VkPrimitiveTopology                     topology
226                                                                         0u,                                                             // const deUint32                                subpass
227                                                                         patchControlPoints,                             // const deUint32                                patchControlPoints
228                                                                         &vertexInputStateCreateInfo,    // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
229                                                                         DE_NULL,                                                // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
230                                                                         DE_NULL,                                                // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
231                                                                         DE_NULL,                                                // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
232                                                                         &colorBlendStateCreateInfo);    // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
233 }
234
235 Move<VkPipeline> makeComputePipeline(Context& context,
236                                                                          const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
237                                                                          deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
238 {
239         const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
240
241         const vk::VkSpecializationMapEntry entries[3] =
242         {
243                 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
244                 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
245                 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
246         };
247
248         const vk::VkSpecializationInfo info =
249         {
250                 /* mapEntryCount = */ 3,
251                 /* pMapEntries   = */ entries,
252                 /* dataSize      = */ sizeof(localSize),
253                 /* pData         = */ localSize
254         };
255
256         const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
257         {
258                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,    // VkStructureType                                      sType;
259                 DE_NULL,                                                                                                // const void*                                          pNext;
260                 0u,                                                                                                             // VkPipelineShaderStageCreateFlags     flags;
261                 VK_SHADER_STAGE_COMPUTE_BIT,                                                    // VkShaderStageFlagBits                        stage;
262                 shaderModule,                                                                                   // VkShaderModule                                       module;
263                 "main",                                                                                                 // const char*                                          pName;
264                 &info,                                                                                                  // const VkSpecializationInfo*          pSpecializationInfo;
265         };
266
267         const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
268         {
269                 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType      sType;
270                 DE_NULL,                                                                                // const void*                                          pNext;
271                 0u,                                                                                             // VkPipelineCreateFlags                        flags;
272                 pipelineShaderStageParams,                                              // VkPipelineShaderStageCreateInfo      stage;
273                 pipelineLayout,                                                                 // VkPipelineLayout                                     layout;
274                 DE_NULL,                                                                                // VkPipeline                                           basePipelineHandle;
275                 0,                                                                                              // deInt32                                                      basePipelineIndex;
276         };
277
278         return createComputePipeline(context.getDeviceInterface(),
279                                                                  context.getDevice(), DE_NULL, &pipelineCreateInfo);
280 }
281
282 Move<VkDescriptorSet> makeDescriptorSet(Context& context,
283                                                                                 const VkDescriptorPool descriptorPool,
284                                                                                 const VkDescriptorSetLayout setLayout)
285 {
286         const VkDescriptorSetAllocateInfo allocateParams =
287         {
288                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType
289                 // sType;
290                 DE_NULL,                // const void*          pNext;
291                 descriptorPool, // VkDescriptorPool       descriptorPool;
292                 1u,                             // deUint32           setLayoutCount;
293                 &setLayout,             // const VkDescriptorSetLayout* pSetLayouts;
294         };
295         return allocateDescriptorSet(
296                            context.getDeviceInterface(), context.getDevice(), &allocateParams);
297 }
298
299 Move<VkCommandPool> makeCommandPool(Context& context)
300 {
301         const VkCommandPoolCreateInfo commandPoolParams =
302         {
303                 VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
304                 DE_NULL,                                                                        // const void*        pNext;
305                 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, // VkCommandPoolCreateFlags
306                 // flags;
307                 context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex;
308         };
309
310         return createCommandPool(
311                            context.getDeviceInterface(), context.getDevice(), &commandPoolParams);
312 }
313
314 Move<VkCommandBuffer> makeCommandBuffer(
315         Context& context, const VkCommandPool commandPool)
316 {
317         const VkCommandBufferAllocateInfo bufferAllocateParams =
318         {
319                 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType              sType;
320                 DE_NULL,                                                                                // const void*                  pNext;
321                 commandPool,                                                                    // VkCommandPool                commandPool;
322                 VK_COMMAND_BUFFER_LEVEL_PRIMARY,                                // VkCommandBufferLevel level;
323                 1u,                                                                                             // deUint32                             bufferCount;
324         };
325         return allocateCommandBuffer(context.getDeviceInterface(),
326                                                                  context.getDevice(), &bufferAllocateParams);
327 }
328
329 Move<VkFence> submitCommandBuffer(
330         Context& context, const VkCommandBuffer commandBuffer)
331 {
332         const VkFenceCreateInfo fenceParams =
333         {
334                 VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType    sType;
335                 DE_NULL,                                                         // const void*      pNext;
336                 0u,                                                                      // VkFenceCreateFlags flags;
337         };
338
339         Move<VkFence> fence(createFence(
340                                                         context.getDeviceInterface(), context.getDevice(), &fenceParams));
341
342         const VkSubmitInfo submitInfo =
343         {
344                 VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType      sType;
345                 DE_NULL,                                           // const void*        pNext;
346                 0u,                                                        // deUint32         waitSemaphoreCount;
347                 DE_NULL,                                           // const VkSemaphore*   pWaitSemaphores;
348                 (const VkPipelineStageFlags*)DE_NULL,
349                 1u,                             // deUint32         commandBufferCount;
350                 &commandBuffer, // const VkCommandBuffer* pCommandBuffers;
351                 0u,                             // deUint32         signalSemaphoreCount;
352                 DE_NULL,                // const VkSemaphore*   pSignalSemaphores;
353         };
354
355         vk::VkResult result = (context.getDeviceInterface().queueSubmit(
356                                                            context.getUniversalQueue(), 1u, &submitInfo, *fence));
357         VK_CHECK(result);
358
359         return Move<VkFence>(fence);
360 }
361
362 void waitFence(Context& context, Move<VkFence> fence)
363 {
364         VK_CHECK(context.getDeviceInterface().waitForFences(
365                                  context.getDevice(), 1u, &fence.get(), DE_TRUE, ~0ull));
366 }
367
368 struct Buffer;
369 struct Image;
370
371 struct BufferOrImage
372 {
373         bool isImage() const
374         {
375                 return m_isImage;
376         }
377
378         Buffer* getAsBuffer()
379         {
380                 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
381                 return reinterpret_cast<Buffer* >(this);
382         }
383
384         Image* getAsImage()
385         {
386                 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
387                 return reinterpret_cast<Image*>(this);
388         }
389
390         virtual VkDescriptorType getType() const
391         {
392                 if (m_isImage)
393                 {
394                         return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
395                 }
396                 else
397                 {
398                         return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
399                 }
400         }
401
402         Allocation& getAllocation() const
403         {
404                 return *m_allocation;
405         }
406
407         virtual ~BufferOrImage() {}
408
409 protected:
410         explicit BufferOrImage(bool image) : m_isImage(image) {}
411
412         bool m_isImage;
413         de::details::MovePtr<Allocation> m_allocation;
414 };
415
416 struct Buffer : public BufferOrImage
417 {
418         explicit Buffer(
419                 Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
420                 : BufferOrImage         (false)
421                 , m_sizeInBytes         (sizeInBytes)
422                 , m_usage                       (usage)
423         {
424                 const vk::VkBufferCreateInfo bufferCreateInfo =
425                 {
426                         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
427                         DE_NULL,
428                         0u,
429                         sizeInBytes,
430                         m_usage,
431                         VK_SHARING_MODE_EXCLUSIVE,
432                         0u,
433                         DE_NULL,
434                 };
435                 m_buffer = createBuffer(context.getDeviceInterface(),
436                                                                 context.getDevice(), &bufferCreateInfo);
437                 vk::VkMemoryRequirements req = getBufferMemoryRequirements(
438                                                                                    context.getDeviceInterface(), context.getDevice(), *m_buffer);
439                 req.size *= 2;
440                 m_allocation = context.getDefaultAllocator().allocate(
441                                                    req, MemoryRequirement::HostVisible);
442                 VK_CHECK(context.getDeviceInterface().bindBufferMemory(
443                                          context.getDevice(), *m_buffer, m_allocation->getMemory(),
444                                          m_allocation->getOffset()));
445         }
446
447         virtual VkDescriptorType getType() const
448         {
449                 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
450                 {
451                         return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
452                 }
453                 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
454         }
455
456         VkBuffer getBuffer() const {
457                 return *m_buffer;
458         }
459
460         const VkBuffer* getBufferPtr() const {
461                 return &(*m_buffer);
462         }
463
464         VkDeviceSize getSize() const {
465                 return m_sizeInBytes;
466         }
467
468 private:
469         Move<VkBuffer>                          m_buffer;
470         VkDeviceSize                            m_sizeInBytes;
471         const VkBufferUsageFlags        m_usage;
472 };
473
474 struct Image : public BufferOrImage
475 {
476         explicit Image(Context& context, deUint32 width, deUint32 height,
477                                    VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
478                 : BufferOrImage(true)
479         {
480                 const VkImageCreateInfo imageCreateInfo =
481                 {
482                         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
483                         format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
484                         VK_IMAGE_TILING_OPTIMAL, usage,
485                         VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
486                         VK_IMAGE_LAYOUT_UNDEFINED
487                 };
488                 m_image = createImage(context.getDeviceInterface(), context.getDevice(),
489                                                           &imageCreateInfo);
490                 vk::VkMemoryRequirements req = getImageMemoryRequirements(
491                                                                                    context.getDeviceInterface(), context.getDevice(), *m_image);
492                 req.size *= 2;
493                 m_allocation =
494                         context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
495                 VK_CHECK(context.getDeviceInterface().bindImageMemory(
496                                          context.getDevice(), *m_image, m_allocation->getMemory(),
497                                          m_allocation->getOffset()));
498
499                 const VkComponentMapping componentMapping =
500                 {
501                         VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
502                         VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
503                 };
504
505                 const VkImageViewCreateInfo imageViewCreateInfo =
506                 {
507                         VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
508                         VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
509                         {
510                                 VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1,
511                         }
512                 };
513
514                 m_imageView = createImageView(context.getDeviceInterface(),
515                                                                           context.getDevice(), &imageViewCreateInfo);
516
517                 const struct VkSamplerCreateInfo samplerCreateInfo =
518                 {
519                         VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
520                         DE_NULL,
521                         0u,
522                         VK_FILTER_NEAREST,
523                         VK_FILTER_NEAREST,
524                         VK_SAMPLER_MIPMAP_MODE_NEAREST,
525                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
526                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
527                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
528                         0.0f,
529                         VK_FALSE,
530                         1.0f,
531                         DE_FALSE,
532                         VK_COMPARE_OP_ALWAYS,
533                         0.0f,
534                         0.0f,
535                         VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
536                         VK_FALSE,
537                 };
538
539                 m_sampler = createSampler(context.getDeviceInterface(), context.getDevice(), &samplerCreateInfo);
540         }
541
542         VkImage getImage() const {
543                 return *m_image;
544         }
545
546         VkImageView getImageView() const {
547                 return *m_imageView;
548         }
549
550         VkSampler getSampler() const {
551                 return *m_sampler;
552         }
553
554 private:
555         Move<VkImage> m_image;
556         Move<VkImageView> m_imageView;
557         Move<VkSampler> m_sampler;
558 };
559 }
560
561 std::string vkt::subgroups::getSharedMemoryBallotHelper()
562 {
563         return  "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
564                         "uvec4 sharedMemoryBallot(bool vote)\n"
565                         "{\n"
566                         "  uint groupOffset = gl_SubgroupID;\n"
567                         "  // One invocation in the group 0's the whole group's data\n"
568                         "  if (subgroupElect())\n"
569                         "  {\n"
570                         "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
571                         "  }\n"
572                         "  subgroupMemoryBarrierShared();\n"
573                         "  if (vote)\n"
574                         "  {\n"
575                         "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
576                         "    const highp uint bitToSet = 1u << invocationId;\n"
577                         "    switch (gl_SubgroupInvocationID / 32)\n"
578                         "    {\n"
579                         "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
580                         "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
581                         "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
582                         "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
583                         "    }\n"
584                         "  }\n"
585                         "  subgroupMemoryBarrierShared();\n"
586                         "  return superSecretComputeShaderHelper[groupOffset];\n"
587                         "}\n";
588 }
589
590 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
591 {
592         VkPhysicalDeviceSubgroupProperties subgroupProperties;
593         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
594         subgroupProperties.pNext = DE_NULL;
595
596         VkPhysicalDeviceProperties2 properties;
597         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
598         properties.pNext = &subgroupProperties;
599
600         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
601
602         return subgroupProperties.subgroupSize;
603 }
604
605 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
606         return 128u;
607 }
608
609 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
610 {
611         switch (stage)
612         {
613                 default:
614                         DE_FATAL("Unhandled stage!");
615                         return "";
616                 case VK_SHADER_STAGE_COMPUTE_BIT:
617                         return "compute";
618                 case VK_SHADER_STAGE_FRAGMENT_BIT:
619                         return "fragment";
620                 case VK_SHADER_STAGE_VERTEX_BIT:
621                         return "vertex";
622                 case VK_SHADER_STAGE_GEOMETRY_BIT:
623                         return "geometry";
624                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
625                         return "tess_control";
626                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
627                         return "tess_eval";
628         }
629 }
630
631 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
632 {
633         switch (bit)
634         {
635                 default:
636                         DE_FATAL("Unknown subgroup feature category!");
637                         return "";
638                 case VK_SUBGROUP_FEATURE_BASIC_BIT:
639                         return "VK_SUBGROUP_FEATURE_BASIC_BIT";
640                 case VK_SUBGROUP_FEATURE_VOTE_BIT:
641                         return "VK_SUBGROUP_FEATURE_VOTE_BIT";
642                 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
643                         return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
644                 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
645                         return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
646                 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
647                         return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
648                 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
649                         return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
650                 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
651                         return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
652                 case VK_SUBGROUP_FEATURE_QUAD_BIT:
653                         return "VK_SUBGROUP_FEATURE_QUAD_BIT";
654         }
655 }
656
657 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
658 {
659         {
660         /*
661                 "#version 450\n"
662                 "void main (void)\n"
663                 "{\n"
664                 "  float pixelSize = 2.0f/1024.0f;\n"
665                 "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
666                 "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
667                 "  gl_PointSize = 1.0f;\n"
668                 "}\n"
669         */
670                 const std::string vertNoSubgroup =
671                         "; SPIR-V\n"
672                         "; Version: 1.3\n"
673                         "; Generator: Khronos Glslang Reference Front End; 1\n"
674                         "; Bound: 37\n"
675                         "; Schema: 0\n"
676                         "OpCapability Shader\n"
677                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
678                         "OpMemoryModel Logical GLSL450\n"
679                         "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
680                         "OpMemberDecorate %20 0 BuiltIn Position\n"
681                         "OpMemberDecorate %20 1 BuiltIn PointSize\n"
682                         "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
683                         "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
684                         "OpDecorate %20 Block\n"
685                         "OpDecorate %26 BuiltIn VertexIndex\n"
686                         "%2 = OpTypeVoid\n"
687                         "%3 = OpTypeFunction %2\n"
688                         "%6 = OpTypeFloat 32\n"
689                         "%7 = OpTypePointer Function %6\n"
690                         "%9 = OpConstant %6 0.00195313\n"
691                         "%12 = OpConstant %6 2\n"
692                         "%14 = OpConstant %6 1\n"
693                         "%16 = OpTypeVector %6 4\n"
694                         "%17 = OpTypeInt 32 0\n"
695                         "%18 = OpConstant %17 1\n"
696                         "%19 = OpTypeArray %6 %18\n"
697                         "%20 = OpTypeStruct %16 %6 %19 %19\n"
698                         "%21 = OpTypePointer Output %20\n"
699                         "%22 = OpVariable %21 Output\n"
700                         "%23 = OpTypeInt 32 1\n"
701                         "%24 = OpConstant %23 0\n"
702                         "%25 = OpTypePointer Input %23\n"
703                         "%26 = OpVariable %25 Input\n"
704                         "%33 = OpConstant %6 0\n"
705                         "%35 = OpTypePointer Output %16\n"
706                         "%37 = OpConstant %23 1\n"
707                         "%38 = OpTypePointer Output %6\n"
708                         "%4 = OpFunction %2 None %3\n"
709                         "%5 = OpLabel\n"
710                         "%8 = OpVariable %7 Function\n"
711                         "%10 = OpVariable %7 Function\n"
712                         "OpStore %8 %9\n"
713                         "%11 = OpLoad %6 %8\n"
714                         "%13 = OpFDiv %6 %11 %12\n"
715                         "%15 = OpFSub %6 %13 %14\n"
716                         "OpStore %10 %15\n"
717                         "%27 = OpLoad %23 %26\n"
718                         "%28 = OpConvertSToF %6 %27\n"
719                         "%29 = OpLoad %6 %8\n"
720                         "%30 = OpFMul %6 %28 %29\n"
721                         "%31 = OpLoad %6 %10\n"
722                         "%32 = OpFAdd %6 %30 %31\n"
723                         "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
724                         "%36 = OpAccessChain %35 %22 %24\n"
725                         "OpStore %36 %34\n"
726                         "%39 = OpAccessChain %38 %22 %37\n"
727                         "OpStore %39 %14\n"
728                         "OpReturn\n"
729                         "OpFunctionEnd\n";
730                 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
731         }
732
733         {
734         /*
735                 "#version 450\n"
736                 "layout(vertices=1) out;\n"
737                 "\n"
738                 "void main (void)\n"
739                 "{\n"
740                 "  if (gl_InvocationID == 0)\n"
741                 "  {\n"
742                 "    gl_TessLevelOuter[0] = 1.0f;\n"
743                 "    gl_TessLevelOuter[1] = 1.0f;\n"
744                 "  }\n"
745                 "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
746                 "}\n"
747         */
748                 const std::string tescNoSubgroup =
749                         "; SPIR-V\n"
750                         "; Version: 1.3\n"
751                         "; Generator: Khronos Glslang Reference Front End; 1\n"
752                         "; Bound: 45\n"
753                         "; Schema: 0\n"
754                         "OpCapability Tessellation\n"
755                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
756                         "OpMemoryModel Logical GLSL450\n"
757                         "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
758                         "OpExecutionMode %4 OutputVertices 1\n"
759                         "OpDecorate %8 BuiltIn InvocationId\n"
760                         "OpDecorate %20 Patch\n"
761                         "OpDecorate %20 BuiltIn TessLevelOuter\n"
762                         "OpMemberDecorate %29 0 BuiltIn Position\n"
763                         "OpMemberDecorate %29 1 BuiltIn PointSize\n"
764                         "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
765                         "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
766                         "OpDecorate %29 Block\n"
767                         "OpMemberDecorate %34 0 BuiltIn Position\n"
768                         "OpMemberDecorate %34 1 BuiltIn PointSize\n"
769                         "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
770                         "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
771                         "OpDecorate %34 Block\n"
772                         "%2 = OpTypeVoid\n"
773                         "%3 = OpTypeFunction %2\n"
774                         "%6 = OpTypeInt 32 1\n"
775                         "%7 = OpTypePointer Input %6\n"
776                         "%8 = OpVariable %7 Input\n"
777                         "%10 = OpConstant %6 0\n"
778                         "%11 = OpTypeBool\n"
779                         "%15 = OpTypeFloat 32\n"
780                         "%16 = OpTypeInt 32 0\n"
781                         "%17 = OpConstant %16 4\n"
782                         "%18 = OpTypeArray %15 %17\n"
783                         "%19 = OpTypePointer Output %18\n"
784                         "%20 = OpVariable %19 Output\n"
785                         "%21 = OpConstant %15 1\n"
786                         "%22 = OpTypePointer Output %15\n"
787                         "%24 = OpConstant %6 1\n"
788                         "%26 = OpTypeVector %15 4\n"
789                         "%27 = OpConstant %16 1\n"
790                         "%28 = OpTypeArray %15 %27\n"
791                         "%29 = OpTypeStruct %26 %15 %28 %28\n"
792                         "%30 = OpTypeArray %29 %27\n"
793                         "%31 = OpTypePointer Output %30\n"
794                         "%32 = OpVariable %31 Output\n"
795                         "%34 = OpTypeStruct %26 %15 %28 %28\n"
796                         "%35 = OpConstant %16 32\n"
797                         "%36 = OpTypeArray %34 %35\n"
798                         "%37 = OpTypePointer Input %36\n"
799                         "%38 = OpVariable %37 Input\n"
800                         "%40 = OpTypePointer Input %26\n"
801                         "%43 = OpTypePointer Output %26\n"
802                         "%4 = OpFunction %2 None %3\n"
803                         "%5 = OpLabel\n"
804                         "%9 = OpLoad %6 %8\n"
805                         "%12 = OpIEqual %11 %9 %10\n"
806                         "OpSelectionMerge %14 None\n"
807                         "OpBranchConditional %12 %13 %14\n"
808                         "%13 = OpLabel\n"
809                         "%23 = OpAccessChain %22 %20 %10\n"
810                         "OpStore %23 %21\n"
811                         "%25 = OpAccessChain %22 %20 %24\n"
812                         "OpStore %25 %21\n"
813                         "OpBranch %14\n"
814                         "%14 = OpLabel\n"
815                         "%33 = OpLoad %6 %8\n"
816                         "%39 = OpLoad %6 %8\n"
817                         "%41 = OpAccessChain %40 %38 %39 %10\n"
818                         "%42 = OpLoad %26 %41\n"
819                         "%44 = OpAccessChain %43 %32 %33 %10\n"
820                         "OpStore %44 %42\n"
821                         "OpReturn\n"
822                         "OpFunctionEnd\n";
823                 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
824         }
825
826         {
827         /*
828                 "#version 450\n"
829                 "layout(isolines) in;\n"
830                 "\n"
831                 "void main (void)\n"
832                 "{\n"
833                 "  float pixelSize = 2.0f/1024.0f;\n"
834                 "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
835                 "}\n";
836         */
837                 const std::string teseNoSubgroup =
838                         "; SPIR-V\n"
839                         "; Version: 1.3\n"
840                         "; Generator: Khronos Glslang Reference Front End; 2\n"
841                         "; Bound: 42\n"
842                         "; Schema: 0\n"
843                         "OpCapability Tessellation\n"
844                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
845                         "OpMemoryModel Logical GLSL450\n"
846                         "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
847                         "OpExecutionMode %4 Isolines\n"
848                         "OpExecutionMode %4 SpacingEqual\n"
849                         "OpExecutionMode %4 VertexOrderCcw\n"
850                         "OpMemberDecorate %14 0 BuiltIn Position\n"
851                         "OpMemberDecorate %14 1 BuiltIn PointSize\n"
852                         "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
853                         "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
854                         "OpDecorate %14 Block\n"
855                         "OpMemberDecorate %19 0 BuiltIn Position\n"
856                         "OpMemberDecorate %19 1 BuiltIn PointSize\n"
857                         "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
858                         "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
859                         "OpDecorate %19 Block\n"
860                         "OpDecorate %29 BuiltIn TessCoord\n"
861                         "%2 = OpTypeVoid\n"
862                         "%3 = OpTypeFunction %2\n"
863                         "%6 = OpTypeFloat 32\n"
864                         "%7 = OpTypePointer Function %6\n"
865                         "%9 = OpConstant %6 0.00195313\n"
866                         "%10 = OpTypeVector %6 4\n"
867                         "%11 = OpTypeInt 32 0\n"
868                         "%12 = OpConstant %11 1\n"
869                         "%13 = OpTypeArray %6 %12\n"
870                         "%14 = OpTypeStruct %10 %6 %13 %13\n"
871                         "%15 = OpTypePointer Output %14\n"
872                         "%16 = OpVariable %15 Output\n"
873                         "%17 = OpTypeInt 32 1\n"
874                         "%18 = OpConstant %17 0\n"
875                         "%19 = OpTypeStruct %10 %6 %13 %13\n"
876                         "%20 = OpConstant %11 32\n"
877                         "%21 = OpTypeArray %19 %20\n"
878                         "%22 = OpTypePointer Input %21\n"
879                         "%23 = OpVariable %22 Input\n"
880                         "%24 = OpTypePointer Input %10\n"
881                         "%27 = OpTypeVector %6 3\n"
882                         "%28 = OpTypePointer Input %27\n"
883                         "%29 = OpVariable %28 Input\n"
884                         "%30 = OpConstant %11 0\n"
885                         "%31 = OpTypePointer Input %6\n"
886                         "%36 = OpConstant %6 2\n"
887                         "%40 = OpTypePointer Output %10\n"
888                         "%4 = OpFunction %2 None %3\n"
889                         "%5 = OpLabel\n"
890                         "%8 = OpVariable %7 Function\n"
891                         "OpStore %8 %9\n"
892                         "%25 = OpAccessChain %24 %23 %18 %18\n"
893                         "%26 = OpLoad %10 %25\n"
894                         "%32 = OpAccessChain %31 %29 %30\n"
895                         "%33 = OpLoad %6 %32\n"
896                         "%34 = OpLoad %6 %8\n"
897                         "%35 = OpFMul %6 %33 %34\n"
898                         "%37 = OpFDiv %6 %35 %36\n"
899                         "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
900                         "%39 = OpFAdd %10 %26 %38\n"
901                         "%41 = OpAccessChain %40 %16 %18\n"
902                         "OpStore %41 %39\n"
903                         "OpReturn\n"
904                         "OpFunctionEnd\n";
905                 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
906         }
907
908 }
909
910
911 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
912 {
913         switch (stage)
914         {
915                 default:
916                         DE_FATAL("Unhandled stage!");
917                         return "";
918                 case VK_SHADER_STAGE_FRAGMENT_BIT:
919                         return
920                                 "#version 450\n"
921                                 "void main (void)\n"
922                                 "{\n"
923                                 "  float pixelSize = 2.0f/1024.0f;\n"
924                                 "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
925                                 "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
926                                 "}\n";
927                 case VK_SHADER_STAGE_GEOMETRY_BIT:
928                         return
929                                 "#version 450\n"
930                                 "void main (void)\n"
931                                 "{\n"
932                                 "}\n";
933                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
934                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
935                         return
936                                 "#version 450\n"
937                                 "void main (void)\n"
938                                 "{\n"
939                                 "}\n";
940         }
941 }
942
943 bool vkt::subgroups::isSubgroupSupported(Context& context)
944 {
945         return context.contextSupports(vk::ApiVersion(1, 1, 0));
946 }
947
948 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
949         Context& context, const VkShaderStageFlags stage)
950 {
951         VkPhysicalDeviceSubgroupProperties subgroupProperties;
952         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
953         subgroupProperties.pNext = DE_NULL;
954
955         VkPhysicalDeviceProperties2 properties;
956         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
957         properties.pNext = &subgroupProperties;
958
959         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
960
961         return (stage & subgroupProperties.supportedStages) ? true : false;
962 }
963
964 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
965         VkShaderStageFlags stage)
966 {
967         switch (stage)
968         {
969                 default:
970                         return false;
971                 case VK_SHADER_STAGE_COMPUTE_BIT:
972                         return true;
973         }
974 }
975
976 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
977         Context& context,
978         VkSubgroupFeatureFlagBits bit) {
979         VkPhysicalDeviceSubgroupProperties subgroupProperties;
980         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
981         subgroupProperties.pNext = DE_NULL;
982
983         VkPhysicalDeviceProperties2 properties;
984         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
985         properties.pNext = &subgroupProperties;
986
987         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
988
989         return (bit & subgroupProperties.supportedOperations) ? true : false;
990 }
991
992 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
993 {
994         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
995                                 context.getInstanceInterface(), context.getPhysicalDevice());
996         return features.fragmentStoresAndAtomics ? true : false;
997 }
998
999 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1000 {
1001         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1002                                 context.getInstanceInterface(), context.getPhysicalDevice());
1003         return features.vertexPipelineStoresAndAtomics ? true : false;
1004 }
1005
1006 bool vkt::subgroups::isDoubleSupportedForDevice(Context& context)
1007 {
1008         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1009                                 context.getInstanceInterface(), context.getPhysicalDevice());
1010         return features.shaderFloat64 ? true : false;
1011 }
1012
1013 bool vkt::subgroups::isDoubleFormat(VkFormat format)
1014 {
1015         switch (format)
1016         {
1017                 default:
1018                         return false;
1019                 case VK_FORMAT_R64_SFLOAT:
1020                 case VK_FORMAT_R64G64_SFLOAT:
1021                 case VK_FORMAT_R64G64B64_SFLOAT:
1022                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1023                         return true;
1024         }
1025 }
1026
1027 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1028 {
1029         switch (format)
1030         {
1031                 default:
1032                         DE_FATAL("Unhandled format!");
1033                         return "";
1034                 case VK_FORMAT_R32_SINT:
1035                         return "int";
1036                 case VK_FORMAT_R32G32_SINT:
1037                         return "ivec2";
1038                 case VK_FORMAT_R32G32B32_SINT:
1039                         return "ivec3";
1040                 case VK_FORMAT_R32G32B32A32_SINT:
1041                         return "ivec4";
1042                 case VK_FORMAT_R32_UINT:
1043                         return "uint";
1044                 case VK_FORMAT_R32G32_UINT:
1045                         return "uvec2";
1046                 case VK_FORMAT_R32G32B32_UINT:
1047                         return "uvec3";
1048                 case VK_FORMAT_R32G32B32A32_UINT:
1049                         return "uvec4";
1050                 case VK_FORMAT_R32_SFLOAT:
1051                         return "float";
1052                 case VK_FORMAT_R32G32_SFLOAT:
1053                         return "vec2";
1054                 case VK_FORMAT_R32G32B32_SFLOAT:
1055                         return "vec3";
1056                 case VK_FORMAT_R32G32B32A32_SFLOAT:
1057                         return "vec4";
1058                 case VK_FORMAT_R64_SFLOAT:
1059                         return "double";
1060                 case VK_FORMAT_R64G64_SFLOAT:
1061                         return "dvec2";
1062                 case VK_FORMAT_R64G64B64_SFLOAT:
1063                         return "dvec3";
1064                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1065                         return "dvec4";
1066                 case VK_FORMAT_R8_USCALED:
1067                         return "bool";
1068                 case VK_FORMAT_R8G8_USCALED:
1069                         return "bvec2";
1070                 case VK_FORMAT_R8G8B8_USCALED:
1071                         return "bvec3";
1072                 case VK_FORMAT_R8G8B8A8_USCALED:
1073                         return "bvec4";
1074         }
1075 }
1076
1077 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
1078 {
1079         /*
1080                 "layout(location = 0) in highp vec4 in_position;\n"
1081                 "void main (void)\n"
1082                 "{\n"
1083                 "  gl_Position = in_position;\n"
1084                 "}\n";
1085         */
1086         programCollection.spirvAsmSources.add("vert") <<
1087                 "; SPIR-V\n"
1088                 "; Version: 1.3\n"
1089                 "; Generator: Khronos Glslang Reference Front End; 2\n"
1090                 "; Bound: 21\n"
1091                 "; Schema: 0\n"
1092                 "OpCapability Shader\n"
1093                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1094                 "OpMemoryModel Logical GLSL450\n"
1095                 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
1096                 "OpMemberDecorate %11 0 BuiltIn Position\n"
1097                 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1098                 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1099                 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1100                 "OpDecorate %11 Block\n"
1101                 "OpDecorate %17 Location 0\n"
1102                 "%2 = OpTypeVoid\n"
1103                 "%3 = OpTypeFunction %2\n"
1104                 "%6 = OpTypeFloat 32\n"
1105                 "%7 = OpTypeVector %6 4\n"
1106                 "%8 = OpTypeInt 32 0\n"
1107                 "%9 = OpConstant %8 1\n"
1108                 "%10 = OpTypeArray %6 %9\n"
1109                 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1110                 "%12 = OpTypePointer Output %11\n"
1111                 "%13 = OpVariable %12 Output\n"
1112                 "%14 = OpTypeInt 32 1\n"
1113                 "%15 = OpConstant %14 0\n"
1114                 "%16 = OpTypePointer Input %7\n"
1115                 "%17 = OpVariable %16 Input\n"
1116                 "%19 = OpTypePointer Output %7\n"
1117                 "%4 = OpFunction %2 None %3\n"
1118                 "%5 = OpLabel\n"
1119                 "%18 = OpLoad %7 %17\n"
1120                 "%20 = OpAccessChain %19 %13 %15\n"
1121                 "OpStore %20 %18\n"
1122                 "OpReturn\n"
1123                 "OpFunctionEnd\n";
1124 }
1125
1126 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
1127 {
1128         /*
1129                 "layout(location = 0) in float in_color;\n"
1130                 "layout(location = 0) out uint out_color;\n"
1131                 "void main()\n"
1132                 {\n"
1133                 "       out_color = uint(in_color);\n"
1134                 "}\n";
1135         */
1136         programCollection.spirvAsmSources.add("fragment") <<
1137                 "; SPIR-V\n"
1138                 "; Version: 1.3\n"
1139                 "; Generator: Khronos Glslang Reference Front End; 2\n"
1140                 "; Bound: 14\n"
1141                 "; Schema: 0\n"
1142                 "OpCapability Shader\n"
1143                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1144                 "OpMemoryModel Logical GLSL450\n"
1145                 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
1146                 "OpExecutionMode %4 OriginUpperLeft\n"
1147                 "OpDecorate %8 Location 0\n"
1148                 "OpDecorate %11 Location 0\n"
1149                 "%2 = OpTypeVoid\n"
1150                 "%3 = OpTypeFunction %2\n"
1151                 "%6 = OpTypeInt 32 0\n"
1152                 "%7 = OpTypePointer Output %6\n"
1153                 "%8 = OpVariable %7 Output\n"
1154                 "%9 = OpTypeFloat 32\n"
1155                 "%10 = OpTypePointer Input %9\n"
1156                 "%11 = OpVariable %10 Input\n"
1157                 "%4 = OpFunction %2 None %3\n"
1158                 "%5 = OpLabel\n"
1159                 "%12 = OpLoad %9 %11\n"
1160                 "%13 = OpConvertFToU %6 %12\n"
1161                 "OpStore %8 %13\n"
1162                 "OpReturn\n"
1163                 "OpFunctionEnd\n";
1164 }
1165
1166 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
1167 {
1168         /*
1169                 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1170                 "#extension GL_EXT_tessellation_shader : require\n"
1171                 "layout(vertices = 2) out;\n"
1172                 "void main (void)\n"
1173                 "{\n"
1174                 "  if (gl_InvocationID == 0)\n"
1175                   {\n"
1176                 "    gl_TessLevelOuter[0] = 1.0f;\n"
1177                 "    gl_TessLevelOuter[1] = 1.0f;\n"
1178                 "  }\n"
1179                 "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1180                 "}\n";
1181         */
1182         programCollection.spirvAsmSources.add("tesc") <<
1183                 "; SPIR-V\n"
1184                 "; Version: 1.3\n"
1185                 "; Generator: Khronos Glslang Reference Front End; 2\n"
1186                 "; Bound: 46\n"
1187                 "; Schema: 0\n"
1188                 "OpCapability Tessellation\n"
1189                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1190                 "OpMemoryModel Logical GLSL450\n"
1191                 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
1192                 "OpExecutionMode %4 OutputVertices 2\n"
1193                 "OpDecorate %8 BuiltIn InvocationId\n"
1194                 "OpDecorate %20 Patch\n"
1195                 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1196                 "OpMemberDecorate %29 0 BuiltIn Position\n"
1197                 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1198                 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1199                 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1200                 "OpDecorate %29 Block\n"
1201                 "OpMemberDecorate %35 0 BuiltIn Position\n"
1202                 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
1203                 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
1204                 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
1205                 "OpDecorate %35 Block\n"
1206                 "%2 = OpTypeVoid\n"
1207                 "%3 = OpTypeFunction %2\n"
1208                 "%6 = OpTypeInt 32 1\n"
1209                 "%7 = OpTypePointer Input %6\n"
1210                 "%8 = OpVariable %7 Input\n"
1211                 "%10 = OpConstant %6 0\n"
1212                 "%11 = OpTypeBool\n"
1213                 "%15 = OpTypeFloat 32\n"
1214                 "%16 = OpTypeInt 32 0\n"
1215                 "%17 = OpConstant %16 4\n"
1216                 "%18 = OpTypeArray %15 %17\n"
1217                 "%19 = OpTypePointer Output %18\n"
1218                 "%20 = OpVariable %19 Output\n"
1219                 "%21 = OpConstant %15 1\n"
1220                 "%22 = OpTypePointer Output %15\n"
1221                 "%24 = OpConstant %6 1\n"
1222                 "%26 = OpTypeVector %15 4\n"
1223                 "%27 = OpConstant %16 1\n"
1224                 "%28 = OpTypeArray %15 %27\n"
1225                 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1226                 "%30 = OpConstant %16 2\n"
1227                 "%31 = OpTypeArray %29 %30\n"
1228                 "%32 = OpTypePointer Output %31\n"
1229                 "%33 = OpVariable %32 Output\n"
1230                 "%35 = OpTypeStruct %26 %15 %28 %28\n"
1231                 "%36 = OpConstant %16 32\n"
1232                 "%37 = OpTypeArray %35 %36\n"
1233                 "%38 = OpTypePointer Input %37\n"
1234                 "%39 = OpVariable %38 Input\n"
1235                 "%41 = OpTypePointer Input %26\n"
1236                 "%44 = OpTypePointer Output %26\n"
1237                 "%4 = OpFunction %2 None %3\n"
1238                 "%5 = OpLabel\n"
1239                 "%9 = OpLoad %6 %8\n"
1240                 "%12 = OpIEqual %11 %9 %10\n"
1241                 "OpSelectionMerge %14 None\n"
1242                 "OpBranchConditional %12 %13 %14\n"
1243                 "%13 = OpLabel\n"
1244                 "%23 = OpAccessChain %22 %20 %10\n"
1245                 "OpStore %23 %21\n"
1246                 "%25 = OpAccessChain %22 %20 %24\n"
1247                 "OpStore %25 %21\n"
1248                 "OpBranch %14\n"
1249                 "%14 = OpLabel\n"
1250                 "%34 = OpLoad %6 %8\n"
1251                 "%40 = OpLoad %6 %8\n"
1252                 "%42 = OpAccessChain %41 %39 %40 %10\n"
1253                 "%43 = OpLoad %26 %42\n"
1254                 "%45 = OpAccessChain %44 %33 %34 %10\n"
1255                 "OpStore %45 %43\n"
1256                 "OpReturn\n"
1257                 "OpFunctionEnd\n";
1258 }
1259
1260 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
1261 {
1262         /*
1263                 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1264                 "#extension GL_EXT_tessellation_shader : require\n"
1265                 "layout(isolines, equal_spacing, ccw ) in;\n"
1266                 "layout(location = 0) in float in_color[];\n"
1267                 "layout(location = 0) out float out_color;\n"
1268                 "\n"
1269                 "void main (void)\n"
1270                 "{\n"
1271                 "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1272                 "  out_color = in_color[0];\n"
1273                 "}\n";
1274         */
1275         programCollection.spirvAsmSources.add("tese") <<
1276                 "; SPIR-V\n"
1277                 "; Version: 1.3\n"
1278                 "; Generator: Khronos Glslang Reference Front End; 2\n"
1279                 "; Bound: 45\n"
1280                 "; Schema: 0\n"
1281                 "OpCapability Tessellation\n"
1282                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1283                 "OpMemoryModel Logical GLSL450\n"
1284                 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
1285                 "OpExecutionMode %4 Isolines\n"
1286                 "OpExecutionMode %4 SpacingEqual\n"
1287                 "OpExecutionMode %4 VertexOrderCcw\n"
1288                 "OpMemberDecorate %11 0 BuiltIn Position\n"
1289                 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1290                 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1291                 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1292                 "OpDecorate %11 Block\n"
1293                 "OpMemberDecorate %16 0 BuiltIn Position\n"
1294                 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
1295                 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
1296                 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
1297                 "OpDecorate %16 Block\n"
1298                 "OpDecorate %29 BuiltIn TessCoord\n"
1299                 "OpDecorate %39 Location 0\n"
1300                 "OpDecorate %42 Location 0\n"
1301                 "%2 = OpTypeVoid\n"
1302                 "%3 = OpTypeFunction %2\n"
1303                 "%6 = OpTypeFloat 32\n"
1304                 "%7 = OpTypeVector %6 4\n"
1305                 "%8 = OpTypeInt 32 0\n"
1306                 "%9 = OpConstant %8 1\n"
1307                 "%10 = OpTypeArray %6 %9\n"
1308                 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1309                 "%12 = OpTypePointer Output %11\n"
1310                 "%13 = OpVariable %12 Output\n"
1311                 "%14 = OpTypeInt 32 1\n"
1312                 "%15 = OpConstant %14 0\n"
1313                 "%16 = OpTypeStruct %7 %6 %10 %10\n"
1314                 "%17 = OpConstant %8 32\n"
1315                 "%18 = OpTypeArray %16 %17\n"
1316                 "%19 = OpTypePointer Input %18\n"
1317                 "%20 = OpVariable %19 Input\n"
1318                 "%21 = OpTypePointer Input %7\n"
1319                 "%24 = OpConstant %14 1\n"
1320                 "%27 = OpTypeVector %6 3\n"
1321                 "%28 = OpTypePointer Input %27\n"
1322                 "%29 = OpVariable %28 Input\n"
1323                 "%30 = OpConstant %8 0\n"
1324                 "%31 = OpTypePointer Input %6\n"
1325                 "%36 = OpTypePointer Output %7\n"
1326                 "%38 = OpTypePointer Output %6\n"
1327                 "%39 = OpVariable %38 Output\n"
1328                 "%40 = OpTypeArray %6 %17\n"
1329                 "%41 = OpTypePointer Input %40\n"
1330                 "%42 = OpVariable %41 Input\n"
1331                 "%4 = OpFunction %2 None %3\n"
1332                 "%5 = OpLabel\n"
1333                 "%22 = OpAccessChain %21 %20 %15 %15\n"
1334                 "%23 = OpLoad %7 %22\n"
1335                 "%25 = OpAccessChain %21 %20 %24 %15\n"
1336                 "%26 = OpLoad %7 %25\n"
1337                 "%32 = OpAccessChain %31 %29 %30\n"
1338                 "%33 = OpLoad %6 %32\n"
1339                 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
1340                 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
1341                 "%37 = OpAccessChain %36 %13 %15\n"
1342                 "OpStore %37 %35\n"
1343                 "%43 = OpAccessChain %31 %42 %15\n"
1344                 "%44 = OpLoad %6 %43\n"
1345                 "OpStore %39 %44\n"
1346                 "OpReturn\n"
1347                 "OpFunctionEnd\n";
1348 }
1349
1350 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
1351 {
1352         tcu::StringTemplate geometryTemplate(glslTemplate);
1353
1354         map<string, string>             linesParams;
1355         linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
1356
1357         map<string, string>             pointsParams;
1358         pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
1359
1360         collection.add("geometry_lines")        << glu::GeometrySource(geometryTemplate.specialize(linesParams))        << options;
1361         collection.add("geometry_points")       << glu::GeometrySource(geometryTemplate.specialize(pointsParams))       << options;
1362 }
1363
1364 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
1365 {
1366         tcu::StringTemplate geometryTemplate(spirvTemplate);
1367
1368         map<string, string>             linesParams;
1369         linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
1370
1371         map<string, string>             pointsParams;
1372         pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
1373
1374         collection.add("geometry_lines")        << geometryTemplate.specialize(linesParams)             << options;
1375         collection.add("geometry_points")       << geometryTemplate.specialize(pointsParams)    << options;
1376 }
1377
1378 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
1379 {
1380         const vk::VkFormat format = data.format;
1381         const vk::VkDeviceSize size = getFormatSizeInBytes(format) * data.numElements;
1382         if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
1383         {
1384                 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
1385
1386                 switch (format)
1387                 {
1388                         default:
1389                                 DE_FATAL("Illegal buffer format");
1390                                 break;
1391                         case VK_FORMAT_R8_USCALED:
1392                         case VK_FORMAT_R8G8_USCALED:
1393                         case VK_FORMAT_R8G8B8_USCALED:
1394                         case VK_FORMAT_R8G8B8A8_USCALED:
1395                         case VK_FORMAT_R32_SINT:
1396                         case VK_FORMAT_R32G32_SINT:
1397                         case VK_FORMAT_R32G32B32_SINT:
1398                         case VK_FORMAT_R32G32B32A32_SINT:
1399                         case VK_FORMAT_R32_UINT:
1400                         case VK_FORMAT_R32G32_UINT:
1401                         case VK_FORMAT_R32G32B32_UINT:
1402                         case VK_FORMAT_R32G32B32A32_UINT:
1403                         {
1404                                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1405
1406                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
1407                                 {
1408                                         ptr[k] = rnd.getUint32();
1409                                 }
1410                         }
1411                         break;
1412                         case VK_FORMAT_R32_SFLOAT:
1413                         case VK_FORMAT_R32G32_SFLOAT:
1414                         case VK_FORMAT_R32G32B32_SFLOAT:
1415                         case VK_FORMAT_R32G32B32A32_SFLOAT:
1416                         {
1417                                 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
1418
1419                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
1420                                 {
1421                                         ptr[k] = rnd.getFloat();
1422                                 }
1423                         }
1424                         break;
1425                         case VK_FORMAT_R64_SFLOAT:
1426                         case VK_FORMAT_R64G64_SFLOAT:
1427                         case VK_FORMAT_R64G64B64_SFLOAT:
1428                         case VK_FORMAT_R64G64B64A64_SFLOAT:
1429                         {
1430                                 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
1431
1432                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
1433                                 {
1434                                         ptr[k] = rnd.getDouble();
1435                                 }
1436                         }
1437                         break;
1438                 }
1439         }
1440         else if (subgroups::SSBOData::InitializeZero == data.initializeType)
1441         {
1442                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1443
1444                 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
1445                 {
1446                         ptr[k] = 0;
1447                 }
1448         }
1449
1450         if (subgroups::SSBOData::InitializeNone != data.initializeType)
1451         {
1452                 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1453         }
1454 }
1455
1456 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
1457 {
1458         switch(shaderStage)
1459         {
1460                 case VK_SHADER_STAGE_VERTEX_BIT:
1461                         return 0u;
1462                         break;
1463                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1464                         return 1u;
1465                         break;
1466                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1467                         return 2u;
1468                         break;
1469                 case VK_SHADER_STAGE_GEOMETRY_BIT:
1470                         return 3u;
1471                         break;
1472                 default:
1473                         DE_ASSERT(0);
1474                         return -1;
1475         }
1476         DE_ASSERT(0);
1477         return -1;
1478 }
1479
1480 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
1481         Context& context, VkFormat format, SSBOData* extraData,
1482         deUint32 extraDataCount,
1483         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1484         const VkShaderStageFlags shaderStage)
1485 {
1486         const deUint32                                                  maxWidth                                = 1024u;
1487         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
1488         DescriptorSetLayoutBuilder                              layoutBuilder;
1489         DescriptorPoolBuilder                                   poolBuilder;
1490         DescriptorSetUpdateBuilder                              updateBuilder;
1491         Move <VkDescriptorPool>                                 descriptorPool;
1492         Move <VkDescriptorSet>                                  descriptorSet;
1493
1494         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1495                                                                                                                                                 context.getBinaryCollection().get("vert"), 0u));
1496         const Unique<VkShaderModule>                    teCtrlShaderModule              (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1497                                                                                                                                                 context.getBinaryCollection().get("tesc"), 0u));
1498         const Unique<VkShaderModule>                    teEvalShaderModule              (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1499                                                                                                                                                 context.getBinaryCollection().get("tese"), 0u));
1500         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1501                                                                                                                                         context.getBinaryCollection().get("fragment"), 0u));
1502         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
1503
1504         const VkVertexInputBindingDescription   vertexInputBinding              =
1505         {
1506                 0u,                                                                                     // binding;
1507                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
1508                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
1509         };
1510
1511         const VkVertexInputAttributeDescription vertexInputAttribute    =
1512         {
1513                 0u,
1514                 0u,
1515                 VK_FORMAT_R32G32B32A32_SFLOAT,
1516                 0u
1517         };
1518
1519         for (deUint32 i = 0u; i < extraDataCount; i++)
1520         {
1521                 if (extraData[i].isImage)
1522                 {
1523                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1524                 }
1525                 else
1526                 {
1527                         vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
1528                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
1529                 }
1530                 const Allocation& alloc = inputBuffers[i]->getAllocation();
1531                 initializeMemory(context, alloc, extraData[i]);
1532         }
1533
1534         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1535                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
1536
1537         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
1538
1539         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(context, *descriptorSetLayout));
1540
1541         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
1542                                                                                                                                         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
1543                                                                                                                                         VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1544                                                                                                                                         *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
1545                                                                                                                                         *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
1546
1547         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1548                 poolBuilder.addType(inputBuffers[ndx]->getType());
1549
1550         if (extraDataCount > 0)
1551         {
1552                 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
1553                                                         VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1554                 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
1555         }
1556
1557         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
1558         {
1559                 if (inputBuffers[buffersNdx]->isImage())
1560                 {
1561                         VkDescriptorImageInfo info =
1562                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
1563                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1564
1565                         updateBuilder.writeSingle(*descriptorSet,
1566                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1567                                                                                 inputBuffers[buffersNdx]->getType(), &info);
1568                 }
1569                 else
1570                 {
1571                         VkDescriptorBufferInfo info =
1572                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
1573                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
1574
1575                         updateBuilder.writeSingle(*descriptorSet,
1576                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1577                                                                                 inputBuffers[buffersNdx]->getType(), &info);
1578                 }
1579         }
1580
1581         updateBuilder.update(context.getDeviceInterface(), context.getDevice());
1582
1583         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(context));
1584         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
1585         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
1586         const vk::VkDeviceSize                                  vertexBufferSize                = 2ull * maxWidth * sizeof(tcu::Vec4);
1587         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1588         unsigned                                                                totalIterations                 = 0u;
1589         unsigned                                                                failedIterations                = 0u;
1590         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1591
1592         {
1593                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
1594                 std::vector<tcu::Vec4>  data                            (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
1595                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
1596                 float                                   leftHandPosition        = -1.0f;
1597
1598                 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
1599                 {
1600                         data[ndx][0] = leftHandPosition;
1601                         leftHandPosition += pixelSize;
1602                         data[ndx+1][0] = leftHandPosition;
1603                 }
1604
1605                 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
1606                 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1607         }
1608
1609         for (deUint32 width = 1u; width < maxWidth; ++width)
1610         {
1611                 const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
1612                 const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
1613                 const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
1614                 const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
1615                 Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1616                 const VkDeviceSize                      vertexBufferOffset      = 0u;
1617
1618                 totalIterations++;
1619
1620                 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1621                 {
1622
1623                         context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
1624                         context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
1625
1626                         beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
1627
1628                         context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
1629
1630                         if (extraDataCount > 0)
1631                         {
1632                                 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
1633                                         VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
1634                                         &descriptorSet.get(), 0u, DE_NULL);
1635                         }
1636
1637                         context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
1638                         context.getDeviceInterface().cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
1639
1640                         endRenderPass(context.getDeviceInterface(), *cmdBuffer);
1641
1642                         copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1643                         endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1644
1645                         Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
1646                         waitFence(context, fence);
1647                 }
1648
1649                 {
1650                         const Allocation& allocResult = imageBufferResult.getAllocation();
1651                         invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
1652
1653                         std::vector<const void*> datas;
1654                         datas.push_back(allocResult.getHostPtr());
1655                         if (!checkResult(datas, width/2u, subgroupSize))
1656                                 failedIterations++;
1657                 }
1658         }
1659
1660         if (0 < failedIterations)
1661         {
1662                 context.getTestContext().getLog()
1663                                 << TestLog::Message << (totalIterations - failedIterations) << " / "
1664                                 << totalIterations << " values passed" << TestLog::EndMessage;
1665                 return tcu::TestStatus::fail("Failed!");
1666         }
1667
1668         return tcu::TestStatus::pass("OK");
1669 }
1670
1671 bool vkt::subgroups::check(std::vector<const void*> datas,
1672         deUint32 width, deUint32 ref)
1673 {
1674         const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
1675
1676         for (deUint32 n = 0; n < width; ++n)
1677         {
1678                 if (data[n] != ref)
1679                 {
1680                         return false;
1681                 }
1682         }
1683
1684         return true;
1685 }
1686
1687 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
1688         const deUint32 numWorkgroups[3], const deUint32 localSize[3],
1689         deUint32 ref)
1690 {
1691         const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
1692         const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
1693         const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
1694
1695         return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
1696 }
1697
1698 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
1699         Context& context, VkFormat format, SSBOData* extraData,
1700         deUint32 extraDataCount,
1701         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
1702 {
1703         const deUint32                                                  maxWidth                                = 1024u;
1704         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
1705         DescriptorSetLayoutBuilder                              layoutBuilder;
1706         DescriptorPoolBuilder                                   poolBuilder;
1707         DescriptorSetUpdateBuilder                              updateBuilder;
1708         Move <VkDescriptorPool>                                 descriptorPool;
1709         Move <VkDescriptorSet>                                  descriptorSet;
1710
1711         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1712                                                                                                                                                 context.getBinaryCollection().get("vert"), 0u));
1713         const Unique<VkShaderModule>                    geometryShaderModule    (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1714                                                                                                                                                 context.getBinaryCollection().get("geometry"), 0u));
1715         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1716                                                                                                                                         context.getBinaryCollection().get("fragment"), 0u));
1717         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
1718         const VkVertexInputBindingDescription   vertexInputBinding              =
1719         {
1720                 0u,                                                                                     // binding;
1721                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
1722                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
1723         };
1724
1725         const VkVertexInputAttributeDescription vertexInputAttribute    =
1726         {
1727                 0u,
1728                 0u,
1729                 VK_FORMAT_R32G32B32A32_SFLOAT,
1730                 0u
1731         };
1732
1733         for (deUint32 i = 0u; i < extraDataCount; i++)
1734         {
1735                 if (extraData[i].isImage)
1736                 {
1737                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1738                 }
1739                 else
1740                 {
1741                         vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
1742                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
1743                 }
1744                 const Allocation& alloc = inputBuffers[i]->getAllocation();
1745                 initializeMemory(context, alloc, extraData[i]);
1746         }
1747
1748         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1749                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
1750
1751         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
1752
1753         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(context, *descriptorSetLayout));
1754
1755         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
1756                                                                                                                                         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
1757                                                                                                                                         *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
1758                                                                                                                                         *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
1759
1760         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1761                 poolBuilder.addType(inputBuffers[ndx]->getType());
1762
1763         if (extraDataCount > 0)
1764         {
1765                 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
1766                                                         VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1767                 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
1768         }
1769
1770         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
1771         {
1772                 if (inputBuffers[buffersNdx]->isImage())
1773                 {
1774                         VkDescriptorImageInfo info =
1775                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
1776                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1777
1778                         updateBuilder.writeSingle(*descriptorSet,
1779                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1780                                                                                 inputBuffers[buffersNdx]->getType(), &info);
1781                 }
1782                 else
1783                 {
1784                         VkDescriptorBufferInfo info =
1785                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
1786                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
1787
1788                         updateBuilder.writeSingle(*descriptorSet,
1789                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1790                                                                                 inputBuffers[buffersNdx]->getType(), &info);
1791                 }
1792         }
1793
1794         updateBuilder.update(context.getDeviceInterface(), context.getDevice());
1795
1796         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(context));
1797         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
1798         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
1799         const vk::VkDeviceSize                                  vertexBufferSize                = maxWidth * sizeof(tcu::Vec4);
1800         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1801         unsigned                                                                totalIterations                 = 0u;
1802         unsigned                                                                failedIterations                = 0u;
1803         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1804
1805         {
1806                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
1807                 std::vector<tcu::Vec4>  data                            (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
1808                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
1809                 float                                   leftHandPosition        = -1.0f;
1810
1811                 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
1812                 {
1813                         data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
1814                         leftHandPosition += pixelSize;
1815                 }
1816
1817                 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
1818                 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1819         }
1820
1821         for (deUint32 width = 1u; width < maxWidth; width++)
1822         {
1823                 totalIterations++;
1824                 const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
1825                 const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
1826                 const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
1827                 const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
1828                 Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1829                 const VkDeviceSize                      vertexBufferOffset      = 0u;
1830
1831                 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
1832                 {
1833                         const Allocation& alloc = inputBuffers[ndx]->getAllocation();
1834                         initializeMemory(context, alloc, extraData[ndx]);
1835                 }
1836
1837                 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1838                 {
1839                         context.getDeviceInterface().cmdSetViewport(
1840                                 *cmdBuffer, 0, 1, &viewport);
1841
1842                         context.getDeviceInterface().cmdSetScissor(
1843                                 *cmdBuffer, 0, 1, &scissor);
1844
1845                         beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
1846
1847                         context.getDeviceInterface().cmdBindPipeline(
1848                                 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
1849
1850                         if (extraDataCount > 0)
1851                         {
1852                                 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
1853                                         VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
1854                                         &descriptorSet.get(), 0u, DE_NULL);
1855                         }
1856
1857                         context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
1858
1859                         context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
1860
1861                         endRenderPass(context.getDeviceInterface(), *cmdBuffer);
1862
1863                         copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1864
1865                         endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1866                         Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
1867                         waitFence(context, fence);
1868                 }
1869
1870                 {
1871                         const Allocation& allocResult = imageBufferResult.getAllocation();
1872                         invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
1873
1874                         std::vector<const void*> datas;
1875                         datas.push_back(allocResult.getHostPtr());
1876                         if (!checkResult(datas, width, subgroupSize))
1877                                 failedIterations++;
1878                 }
1879         }
1880
1881         if (0 < failedIterations)
1882         {
1883                 context.getTestContext().getLog()
1884                                 << TestLog::Message << (totalIterations - failedIterations) << " / "
1885                                 << totalIterations << " values passed" << TestLog::EndMessage;
1886                 return tcu::TestStatus::fail("Failed!");
1887         }
1888
1889         return tcu::TestStatus::pass("OK");
1890 }
1891
1892
1893 tcu::TestStatus vkt::subgroups::allStages(
1894         Context& context, VkFormat format, SSBOData* extraDatas,
1895         deUint32 extraDatasCount,
1896         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1897         const VkShaderStageFlags shaderStageTested)
1898 {
1899         const deUint32                                  maxWidth                        = 1024u;
1900         vector<VkShaderStageFlagBits>   stagesVector;
1901         VkShaderStageFlags                              shaderStageRequired     = (VkShaderStageFlags)0ull;
1902
1903         Move<VkShaderModule>                    vertexShaderModule;
1904         Move<VkShaderModule>                    teCtrlShaderModule;
1905         Move<VkShaderModule>                    teEvalShaderModule;
1906         Move<VkShaderModule>                    geometryShaderModule;
1907         Move<VkShaderModule>                    fragmentShaderModule;
1908
1909         if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
1910         {
1911                 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
1912         }
1913         if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1914         {
1915                 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
1916                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
1917                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
1918         }
1919         if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1920         {
1921                 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
1922                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
1923                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1924         }
1925         if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
1926         {
1927                 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
1928                 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
1929                 shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
1930         }
1931         if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
1932         {
1933                 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
1934                 shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
1935         }
1936
1937         const deUint32  stagesCount     = static_cast<deUint32>(stagesVector.size());
1938         const string    vert            = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)                                    ? "vert_noSubgroup"             : "vert";
1939         const string    tesc            = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)              ? "tesc_noSubgroup"             : "tesc";
1940         const string    tese            = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)   ? "tese_noSubgroup"             : "tese";
1941
1942         shaderStageRequired = shaderStageTested | shaderStageRequired;
1943
1944         vertexShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(vert), 0u);
1945         if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1946         {
1947                 teCtrlShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tesc), 0u);
1948                 teEvalShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tese), 0u);
1949         }
1950         if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
1951         {
1952                 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1953                 {
1954                         // tessellation shaders output line primitives
1955                         geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_lines"), 0u);
1956                 }
1957                 else
1958                 {
1959                         // otherwise points are processed by geometry shader
1960                         geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_points"), 0u);
1961                 }
1962         }
1963         if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
1964                 fragmentShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u);
1965
1966         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
1967
1968         DescriptorSetLayoutBuilder layoutBuilder;
1969         // The implicit result SSBO we use to store our outputs from the shader
1970         for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
1971         {
1972                 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
1973                 const VkDeviceSize size = getFormatSizeInBytes(format) * shaderSize;
1974                 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
1975
1976                 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
1977         }
1978
1979         for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
1980         {
1981                 const deUint32 datasNdx = ndx - stagesCount;
1982                 if (extraDatas[datasNdx].isImage)
1983                 {
1984                         inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
1985                 }
1986                 else
1987                 {
1988                         const vk::VkDeviceSize size = getFormatSizeInBytes(extraDatas[datasNdx].format) * extraDatas[datasNdx].numElements;
1989                         inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
1990                 }
1991
1992                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
1993                 initializeMemory(context, alloc, extraDatas[datasNdx]);
1994
1995                 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
1996                                                                 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
1997         }
1998
1999         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2000                 layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2001
2002         const Unique<VkPipelineLayout> pipelineLayout(
2003                 makePipelineLayout(context, *descriptorSetLayout));
2004
2005         const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2006         const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2007                                                                                 shaderStageRequired,
2008                                                                                 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
2009                                                                                 *renderPass,
2010                                                                                 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
2011
2012         DescriptorPoolBuilder poolBuilder;
2013
2014         for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
2015         {
2016                 poolBuilder.addType(inputBuffers[ndx]->getType());
2017         }
2018
2019         const Unique<VkDescriptorPool> descriptorPool(
2020                 poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2021                                                   VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2022
2023         // Create descriptor set
2024         const Unique<VkDescriptorSet> descriptorSet(
2025                 makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
2026
2027         DescriptorSetUpdateBuilder updateBuilder;
2028
2029         for (deUint32 ndx = 0u; ndx < stagesCount; ndx++)
2030         {
2031                 if (inputBuffers[ndx]->isImage())
2032                 {
2033                         VkDescriptorImageInfo info =
2034                                 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2035                                                                                 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2036
2037                         updateBuilder.writeSingle(*descriptorSet,
2038                                                                           DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
2039                                                                           inputBuffers[ndx]->getType(), &info);
2040                 }
2041                 else
2042                 {
2043                         VkDescriptorBufferInfo info =
2044                                 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2045                                                                                  0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2046
2047                         updateBuilder.writeSingle(*descriptorSet,
2048                                                                           DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
2049                                                                           inputBuffers[ndx]->getType(), &info);
2050                 }
2051         }
2052
2053         for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ndx++)
2054         {
2055                 if (inputBuffers[ndx]->isImage())
2056                 {
2057                         VkDescriptorImageInfo info =
2058                                 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2059                                                                                 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2060
2061                         updateBuilder.writeSingle(*descriptorSet,
2062                                                                           DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx -stagesCount].binding),
2063                                                                           inputBuffers[ndx]->getType(), &info);
2064                 }
2065                 else
2066                 {
2067                         VkDescriptorBufferInfo info =
2068                                 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2069                                                                                  0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2070
2071                         updateBuilder.writeSingle(*descriptorSet,
2072                                                                           DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx - stagesCount].binding),
2073                                                                           inputBuffers[ndx]->getType(), &info);
2074                 }
2075         }
2076         updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2077
2078         {
2079                 const Unique<VkCommandPool>             cmdPool                                 (makeCommandPool(context));
2080                 const deUint32                                  subgroupSize                    = getSubgroupSize(context);
2081                 const Unique<VkCommandBuffer>   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
2082                 unsigned                                                totalIterations                 = 0u;
2083                 unsigned                                                failedIterations                = 0u;
2084                 Image                                                   resultImage                             (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2085                 const Unique<VkFramebuffer>             framebuffer                             (makeFramebuffer(context, *renderPass, resultImage.getImageView(), maxWidth, 1));
2086                 const VkViewport                                viewport                                = makeViewport(maxWidth, 1u);
2087                 const VkRect2D                                  scissor                                 = makeRect2D(maxWidth, 1u);
2088                 const vk::VkDeviceSize                  imageResultSize                 = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2089                 Buffer                                                  imageBufferResult               (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2090                 const VkImageSubresourceRange   subresourceRange                =
2091                 {
2092                         VK_IMAGE_ASPECT_COLOR_BIT,                                                                                      //VkImageAspectFlags    aspectMask
2093                         0u,                                                                                                                                     //deUint32                              baseMipLevel
2094                         1u,                                                                                                                                     //deUint32                              levelCount
2095                         0u,                                                                                                                                     //deUint32                              baseArrayLayer
2096                         1u                                                                                                                                      //deUint32                              layerCount
2097                 };
2098
2099                 const VkImageMemoryBarrier              colorAttachmentBarrier  = makeImageMemoryBarrier(
2100                         (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2101                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2102                         resultImage.getImage(), subresourceRange);
2103
2104                 for (deUint32 width = 1u; width < maxWidth; width++)
2105                 {
2106                         for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
2107                         {
2108                                 // re-init the data
2109                                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2110                                 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
2111                         }
2112
2113                         totalIterations++;
2114
2115                         beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2116
2117                         context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
2118
2119                         context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2120
2121                         context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2122
2123                         beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2124
2125                         context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2126
2127                         context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2128                                         VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2129                                         &descriptorSet.get(), 0u, DE_NULL);
2130
2131                         context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
2132
2133                         endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2134
2135                         copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2136
2137                         endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2138
2139                         Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2140                         waitFence(context, fence);
2141
2142                         for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
2143                         {
2144                                 std::vector<const void*> datas;
2145                                 if (!inputBuffers[ndx]->isImage())
2146                                 {
2147                                         const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
2148                                         invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2149                                         // we always have our result data first
2150                                         datas.push_back(resultAlloc.getHostPtr());
2151                                 }
2152
2153                                 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2154                                 {
2155                                         const deUint32 datasNdx = index - stagesCount;
2156                                         if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
2157                                         {
2158                                                 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
2159                                                 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2160                                                 // we always have our result data first
2161                                                 datas.push_back(resultAlloc.getHostPtr());
2162                                         }
2163                                 }
2164
2165                                 if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
2166                                         failedIterations++;
2167                         }
2168                         if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2169                         {
2170                                 std::vector<const void*> datas;
2171                                 const Allocation& resultAlloc = imageBufferResult.getAllocation();
2172                                 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2173
2174                                 // we always have our result data first
2175                                 datas.push_back(resultAlloc.getHostPtr());
2176
2177                                 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2178                                 {
2179                                         const deUint32 datasNdx = index - stagesCount;
2180                                         if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
2181                                         {
2182                                                 const Allocation& alloc = inputBuffers[index]->getAllocation();
2183                                                 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2184                                                 // we always have our result data first
2185                                                 datas.push_back(alloc.getHostPtr());
2186                                         }
2187                                 }
2188
2189                                 if (!checkResult(datas, width , subgroupSize))
2190                                         failedIterations++;
2191                         }
2192
2193                         context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2194                 }
2195
2196                 if (0 < failedIterations)
2197                 {
2198                         context.getTestContext().getLog()
2199                                         << TestLog::Message << (totalIterations - failedIterations) << " / "
2200                                         << totalIterations << " values passed" << TestLog::EndMessage;
2201                         return tcu::TestStatus::fail("Failed!");
2202                 }
2203         }
2204
2205         return tcu::TestStatus::pass("OK");
2206 }
2207
2208 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
2209         SSBOData* extraData, deUint32 extraDataCount,
2210         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2211 {
2212         const deUint32                                                  maxWidth                                = 1024u;
2213         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
2214         DescriptorSetLayoutBuilder                              layoutBuilder;
2215         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule
2216                                                                                                                                                 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
2217         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule
2218                                                                                                                                                 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
2219         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
2220
2221         const VkVertexInputBindingDescription   vertexInputBinding              =
2222         {
2223                 0u,                                                                                     // binding;
2224                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
2225                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
2226         };
2227
2228         const VkVertexInputAttributeDescription vertexInputAttribute    =
2229         {
2230                 0u,
2231                 0u,
2232                 VK_FORMAT_R32G32B32A32_SFLOAT,
2233                 0u
2234         };
2235
2236         for (deUint32 i = 0u; i < extraDataCount; i++)
2237         {
2238                 if (extraData[i].isImage)
2239                 {
2240                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2241                 }
2242                 else
2243                 {
2244                         vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
2245                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2246                 }
2247                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2248                 initializeMemory(context, alloc, extraData[i]);
2249         }
2250
2251         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2252                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
2253
2254         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2255
2256         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(context, *descriptorSetLayout));
2257
2258         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
2259                                                                                                                                                 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2260                                                                                                                                                 *vertexShaderModule, *fragmentShaderModule,
2261                                                                                                                                                 DE_NULL, DE_NULL, DE_NULL,
2262                                                                                                                                                 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
2263                                                                                                                                                 &vertexInputBinding, &vertexInputAttribute, true, format));
2264         DescriptorPoolBuilder                                   poolBuilder;
2265         DescriptorSetUpdateBuilder                              updateBuilder;
2266
2267
2268         for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2269                 poolBuilder.addType(inputBuffers[ndx]->getType());
2270
2271         Move <VkDescriptorPool>                                 descriptorPool;
2272         Move <VkDescriptorSet>                                  descriptorSet;
2273
2274         if (extraDataCount > 0)
2275         {
2276                 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2277                                                         VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2278                 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
2279         }
2280
2281         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2282         {
2283                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2284                 initializeMemory(context, alloc, extraData[ndx]);
2285         }
2286
2287         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2288         {
2289                 if (inputBuffers[buffersNdx]->isImage())
2290                 {
2291                         VkDescriptorImageInfo info =
2292                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2293                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2294
2295                         updateBuilder.writeSingle(*descriptorSet,
2296                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2297                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2298                 }
2299                 else
2300                 {
2301                         VkDescriptorBufferInfo info =
2302                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2303                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2304
2305                         updateBuilder.writeSingle(*descriptorSet,
2306                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2307                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2308                 }
2309         }
2310         updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2311
2312         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(context));
2313
2314         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
2315
2316         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
2317
2318         const vk::VkDeviceSize                                  vertexBufferSize                = maxWidth * sizeof(tcu::Vec4);
2319         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2320
2321         unsigned                                                                totalIterations                 = 0u;
2322         unsigned                                                                failedIterations                = 0u;
2323
2324         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2325
2326         {
2327                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
2328                 std::vector<tcu::Vec4>  data                            (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2329                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
2330                 float                                   leftHandPosition        = -1.0f;
2331
2332                 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2333                 {
2334                         data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2335                         leftHandPosition += pixelSize;
2336                 }
2337
2338                 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2339                 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2340         }
2341
2342         for (deUint32 width = 1u; width < maxWidth; width++)
2343         {
2344                 totalIterations++;
2345                 const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
2346                 const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
2347                 const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
2348                 const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2349                 Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2350                 const VkDeviceSize                      vertexBufferOffset      = 0u;
2351
2352                 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2353                 {
2354                         const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2355                         initializeMemory(context, alloc, extraData[ndx]);
2356                 }
2357
2358                 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2359                 {
2360                         context.getDeviceInterface().cmdSetViewport(
2361                                 *cmdBuffer, 0, 1, &viewport);
2362
2363                         context.getDeviceInterface().cmdSetScissor(
2364                                 *cmdBuffer, 0, 1, &scissor);
2365
2366                         beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2367
2368                         context.getDeviceInterface().cmdBindPipeline(
2369                                 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2370
2371                         if (extraDataCount > 0)
2372                         {
2373                                 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2374                                         VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2375                                         &descriptorSet.get(), 0u, DE_NULL);
2376                         }
2377
2378                         context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2379
2380                         context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2381
2382                         endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2383
2384                         copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2385
2386                         endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2387                         Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2388                         waitFence(context, fence);
2389                 }
2390
2391                 {
2392                         const Allocation& allocResult = imageBufferResult.getAllocation();
2393                         invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
2394
2395                         std::vector<const void*> datas;
2396                         datas.push_back(allocResult.getHostPtr());
2397                         if (!checkResult(datas, width, subgroupSize))
2398                                 failedIterations++;
2399                 }
2400         }
2401
2402         if (0 < failedIterations)
2403         {
2404                 context.getTestContext().getLog()
2405                                 << TestLog::Message << (totalIterations - failedIterations) << " / "
2406                                 << totalIterations << " values passed" << TestLog::EndMessage;
2407                 return tcu::TestStatus::fail("Failed!");
2408         }
2409
2410         return tcu::TestStatus::pass("OK");
2411 }
2412
2413
2414 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest     (Context& context, VkFormat format, SSBOData* extraDatas,
2415         deUint32 extraDatasCount,
2416         bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
2417                                                 deUint32 height, deUint32 subgroupSize))
2418 {
2419         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule
2420                                                                                                                                                 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
2421         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule
2422                                                                                                                                                 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
2423
2424         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
2425
2426         for (deUint32 i = 0; i < extraDatasCount; i++)
2427         {
2428                 if (extraDatas[i].isImage)
2429                 {
2430                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
2431                                                                                 static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
2432                 }
2433                 else
2434                 {
2435                         vk::VkDeviceSize size =
2436                                 getFormatSizeInBytes(extraDatas[i].format) * extraDatas[i].numElements;
2437                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2438                 }
2439
2440                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2441                 initializeMemory(context, alloc, extraDatas[i]);
2442         }
2443
2444         DescriptorSetLayoutBuilder layoutBuilder;
2445
2446         for (deUint32 i = 0; i < extraDatasCount; i++)
2447         {
2448                 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
2449                                                                  VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
2450         }
2451
2452         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2453                 layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2454
2455         const Unique<VkPipelineLayout> pipelineLayout(
2456                 makePipelineLayout(context, *descriptorSetLayout));
2457
2458         const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2459         const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2460                                                                           VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2461                                                                           *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
2462                                                                           DE_NULL, DE_NULL, true));
2463
2464         DescriptorPoolBuilder poolBuilder;
2465
2466         // To stop validation complaining, always add at least one type to pool.
2467         poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2468         for (deUint32 i = 0; i < extraDatasCount; i++)
2469         {
2470                 poolBuilder.addType(inputBuffers[i]->getType());
2471         }
2472
2473         Move<VkDescriptorPool> descriptorPool;
2474         // Create descriptor set
2475         Move<VkDescriptorSet> descriptorSet;
2476
2477         if (extraDatasCount > 0)
2478         {
2479                 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2480                                                                                                         VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2481
2482                 descriptorSet   = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
2483         }
2484
2485         DescriptorSetUpdateBuilder updateBuilder;
2486
2487         for (deUint32 i = 0; i < extraDatasCount; i++)
2488         {
2489                 if (inputBuffers[i]->isImage())
2490                 {
2491                         VkDescriptorImageInfo info =
2492                                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
2493                                                                                 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2494
2495                         updateBuilder.writeSingle(*descriptorSet,
2496                                                                           DescriptorSetUpdateBuilder::Location::binding(i),
2497                                                                           inputBuffers[i]->getType(), &info);
2498                 }
2499                 else
2500                 {
2501                         VkDescriptorBufferInfo info =
2502                                 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
2503                                                                                  0ull, inputBuffers[i]->getAsBuffer()->getSize());
2504
2505                         updateBuilder.writeSingle(*descriptorSet,
2506                                                                           DescriptorSetUpdateBuilder::Location::binding(i),
2507                                                                           inputBuffers[i]->getType(), &info);
2508                 }
2509         }
2510
2511         if (extraDatasCount > 0)
2512                 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2513
2514         const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
2515
2516         const deUint32 subgroupSize = getSubgroupSize(context);
2517
2518         const Unique<VkCommandBuffer> cmdBuffer(
2519                 makeCommandBuffer(context, *cmdPool));
2520
2521         unsigned totalIterations = 0;
2522         unsigned failedIterations = 0;
2523
2524         for (deUint32 width = 8; width <= subgroupSize; width *= 2)
2525         {
2526                 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
2527                 {
2528                         totalIterations++;
2529
2530                         // re-init the data
2531                         for (deUint32 i = 0; i < extraDatasCount; i++)
2532                         {
2533                                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2534                                 initializeMemory(context, alloc, extraDatas[i]);
2535                         }
2536
2537                         VkDeviceSize formatSize = getFormatSizeInBytes(format);
2538                         const VkDeviceSize resultImageSizeInBytes =
2539                                 width * height * formatSize;
2540
2541                         Image resultImage(context, width, height, format,
2542                                                           VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
2543                                                           VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2544
2545                         Buffer resultBuffer(context, resultImageSizeInBytes,
2546                                                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT);
2547
2548                         const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
2549                                                                                                         *renderPass, resultImage.getImageView(), width, height));
2550
2551                         beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2552
2553                         VkViewport viewport = makeViewport(width, height);
2554
2555                         context.getDeviceInterface().cmdSetViewport(
2556                                 *cmdBuffer, 0, 1, &viewport);
2557
2558                         VkRect2D scissor = {{0, 0}, {width, height}};
2559
2560                         context.getDeviceInterface().cmdSetScissor(
2561                                 *cmdBuffer, 0, 1, &scissor);
2562
2563                         beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
2564
2565                         context.getDeviceInterface().cmdBindPipeline(
2566                                 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2567
2568                         if (extraDatasCount > 0)
2569                         {
2570                                 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2571                                                 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2572                                                 &descriptorSet.get(), 0u, DE_NULL);
2573                         }
2574
2575                         context.getDeviceInterface().cmdDraw(*cmdBuffer, 4, 1, 0, 0);
2576
2577                         endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2578
2579                         copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2580
2581                         endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2582
2583                         Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2584
2585                         waitFence(context, fence);
2586
2587                         std::vector<const void*> datas;
2588                         {
2589                                 const Allocation& resultAlloc = resultBuffer.getAllocation();
2590                                 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2591
2592                                 // we always have our result data first
2593                                 datas.push_back(resultAlloc.getHostPtr());
2594                         }
2595
2596                         if (!checkResult(datas, width, height, subgroupSize))
2597                         {
2598                                 failedIterations++;
2599                         }
2600
2601                         context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2602                 }
2603         }
2604
2605         if (0 < failedIterations)
2606         {
2607                 context.getTestContext().getLog()
2608                                 << TestLog::Message << (totalIterations - failedIterations) << " / "
2609                                 << totalIterations << " values passed" << TestLog::EndMessage;
2610                 return tcu::TestStatus::fail("Failed!");
2611         }
2612
2613         return tcu::TestStatus::pass("OK");
2614 }
2615
2616 tcu::TestStatus vkt::subgroups::makeComputeTest(
2617         Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
2618         bool (*checkResult)(std::vector<const void*> datas,
2619                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2620                                                 deUint32 subgroupSize))
2621 {
2622         VkDeviceSize elementSize = getFormatSizeInBytes(format);
2623
2624         const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
2625                                                                                   maxSupportedSubgroupSize() *
2626                                                                                   maxSupportedSubgroupSize();
2627         const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
2628
2629         Buffer resultBuffer(
2630                 context, resultBufferSizeInBytes);
2631
2632         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
2633
2634         for (deUint32 i = 0; i < inputsCount; i++)
2635         {
2636                 if (inputs[i].isImage)
2637                 {
2638                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
2639                                                                                 static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
2640                 }
2641                 else
2642                 {
2643                         vk::VkDeviceSize size =
2644                                 getFormatSizeInBytes(inputs[i].format) * inputs[i].numElements;
2645                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2646                 }
2647
2648                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2649                 initializeMemory(context, alloc, inputs[i]);
2650         }
2651
2652         DescriptorSetLayoutBuilder layoutBuilder;
2653         layoutBuilder.addBinding(
2654                 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
2655
2656         for (deUint32 i = 0; i < inputsCount; i++)
2657         {
2658                 layoutBuilder.addBinding(
2659                         inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
2660         }
2661
2662         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2663                 layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2664
2665         const Unique<VkShaderModule> shaderModule(
2666                 createShaderModule(context.getDeviceInterface(), context.getDevice(),
2667                                                    context.getBinaryCollection().get("comp"), 0u));
2668         const Unique<VkPipelineLayout> pipelineLayout(
2669                 makePipelineLayout(context, *descriptorSetLayout));
2670
2671         DescriptorPoolBuilder poolBuilder;
2672
2673         poolBuilder.addType(resultBuffer.getType());
2674
2675         for (deUint32 i = 0; i < inputsCount; i++)
2676         {
2677                 poolBuilder.addType(inputBuffers[i]->getType());
2678         }
2679
2680         const Unique<VkDescriptorPool> descriptorPool(
2681                 poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2682                                                   VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2683
2684         // Create descriptor set
2685         const Unique<VkDescriptorSet> descriptorSet(
2686                 makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
2687
2688         DescriptorSetUpdateBuilder updateBuilder;
2689
2690         const VkDescriptorBufferInfo resultDescriptorInfo =
2691                 makeDescriptorBufferInfo(
2692                         resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
2693
2694         updateBuilder.writeSingle(*descriptorSet,
2695                                                           DescriptorSetUpdateBuilder::Location::binding(0u),
2696                                                           VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
2697
2698         for (deUint32 i = 0; i < inputsCount; i++)
2699         {
2700                 if (inputBuffers[i]->isImage())
2701                 {
2702                         VkDescriptorImageInfo info =
2703                                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
2704                                                                                 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2705
2706                         updateBuilder.writeSingle(*descriptorSet,
2707                                                                           DescriptorSetUpdateBuilder::Location::binding(i + 1),
2708                                                                           inputBuffers[i]->getType(), &info);
2709                 }
2710                 else
2711                 {
2712                         vk::VkDeviceSize size =
2713                                 getFormatSizeInBytes(inputs[i].format) * inputs[i].numElements;
2714                         VkDescriptorBufferInfo info =
2715                                 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
2716
2717                         updateBuilder.writeSingle(*descriptorSet,
2718                                                                           DescriptorSetUpdateBuilder::Location::binding(i + 1),
2719                                                                           inputBuffers[i]->getType(), &info);
2720                 }
2721         }
2722
2723         updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2724
2725         const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
2726
2727         unsigned totalIterations = 0;
2728         unsigned failedIterations = 0;
2729
2730         const deUint32 subgroupSize = getSubgroupSize(context);
2731
2732         const Unique<VkCommandBuffer> cmdBuffer(
2733                 makeCommandBuffer(context, *cmdPool));
2734
2735         const deUint32 numWorkgroups[3] = {4, 2, 2};
2736
2737         const deUint32 localSizesToTestCount = 15;
2738         deUint32 localSizesToTest[localSizesToTestCount][3] =
2739         {
2740                 {1, 1, 1},
2741                 {32, 4, 1},
2742                 {32, 1, 4},
2743                 {1, 32, 4},
2744                 {1, 4, 32},
2745                 {4, 1, 32},
2746                 {4, 32, 1},
2747                 {subgroupSize, 1, 1},
2748                 {1, subgroupSize, 1},
2749                 {1, 1, subgroupSize},
2750                 {3, 5, 7},
2751                 {128, 1, 1},
2752                 {1, 128, 1},
2753                 {1, 1, 64},
2754                 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
2755         };
2756
2757         Move<VkPipeline> lastPipeline(
2758                 makeComputePipeline(context, *pipelineLayout, *shaderModule,
2759                                                         localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]));
2760
2761         for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
2762         {
2763                 const deUint32 nextX = localSizesToTest[index + 1][0];
2764                 const deUint32 nextY = localSizesToTest[index + 1][1];
2765                 const deUint32 nextZ = localSizesToTest[index + 1][2];
2766
2767                 // we are running one test
2768                 totalIterations++;
2769
2770                 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2771
2772                 context.getDeviceInterface().cmdBindPipeline(
2773                         *cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *lastPipeline);
2774
2775                 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2776                                 VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
2777                                 &descriptorSet.get(), 0u, DE_NULL);
2778
2779                 context.getDeviceInterface().cmdDispatch(*cmdBuffer,
2780                                 numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
2781
2782                 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2783
2784                 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2785
2786                 Move<VkPipeline> nextPipeline(
2787                         makeComputePipeline(context, *pipelineLayout, *shaderModule,
2788                                                                 nextX, nextY, nextZ));
2789
2790                 waitFence(context, fence);
2791
2792                 std::vector<const void*> datas;
2793
2794                 {
2795                         const Allocation& resultAlloc = resultBuffer.getAllocation();
2796                         invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2797
2798                         // we always have our result data first
2799                         datas.push_back(resultAlloc.getHostPtr());
2800                 }
2801
2802                 for (deUint32 i = 0; i < inputsCount; i++)
2803                 {
2804                         if (!inputBuffers[i]->isImage())
2805                         {
2806                                 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
2807                                 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2808
2809                                 // we always have our result data first
2810                                 datas.push_back(resultAlloc.getHostPtr());
2811                         }
2812                 }
2813
2814                 if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
2815                 {
2816                         failedIterations++;
2817                 }
2818
2819                 context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2820
2821                 lastPipeline = nextPipeline;
2822         }
2823
2824         if (0 < failedIterations)
2825         {
2826                 context.getTestContext().getLog()
2827                                 << TestLog::Message << (totalIterations - failedIterations) << " / "
2828                                 << totalIterations << " values passed" << TestLog::EndMessage;
2829                 return tcu::TestStatus::fail("Failed!");
2830         }
2831
2832         return tcu::TestStatus::pass("OK");
2833 }