Merge vk-gl-cts/master into vk-gl-cts/vulkan-cts-next-dev
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / subgroups / vktSubgroupsTestsUtils.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "deFloat16.h"
28 #include "deRandom.hpp"
29 #include "tcuCommandLine.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "vkBarrierUtil.hpp"
32 #include "vkImageUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkObjUtil.hpp"
36
37 using namespace tcu;
38 using namespace std;
39 using namespace vk;
40 using namespace vkt;
41
42 namespace
43 {
44
45 deUint32 getMaxWidth ()
46 {
47         return 1024u;
48 }
49
50 deUint32 getNextWidth (const deUint32 width)
51 {
52         if (width < 128)
53         {
54                 // This ensures we test every value up to 128 (the max subgroup size).
55                 return width + 1;
56         }
57         else
58         {
59                 // And once we hit 128 we increment to only power of 2's to reduce testing time.
60                 return width * 2;
61         }
62 }
63
64 deUint32 getFormatSizeInBytes(const VkFormat format)
65 {
66         switch (format)
67         {
68                 default:
69                         DE_FATAL("Unhandled format!");
70                         return 0;
71                 case VK_FORMAT_R8_SINT:
72                 case VK_FORMAT_R8_UINT:
73                         return static_cast<deUint32>(sizeof(deInt8));
74                 case VK_FORMAT_R8G8_SINT:
75                 case VK_FORMAT_R8G8_UINT:
76                         return static_cast<deUint32>(sizeof(deInt8) * 2);
77                 case VK_FORMAT_R8G8B8_SINT:
78                 case VK_FORMAT_R8G8B8_UINT:
79                 case VK_FORMAT_R8G8B8A8_SINT:
80                 case VK_FORMAT_R8G8B8A8_UINT:
81                         return static_cast<deUint32>(sizeof(deInt8) * 4);
82                 case VK_FORMAT_R16_SINT:
83                 case VK_FORMAT_R16_UINT:
84                 case VK_FORMAT_R16_SFLOAT:
85                         return static_cast<deUint32>(sizeof(deInt16));
86                 case VK_FORMAT_R16G16_SINT:
87                 case VK_FORMAT_R16G16_UINT:
88                 case VK_FORMAT_R16G16_SFLOAT:
89                         return static_cast<deUint32>(sizeof(deInt16) * 2);
90                 case VK_FORMAT_R16G16B16_UINT:
91                 case VK_FORMAT_R16G16B16_SINT:
92                 case VK_FORMAT_R16G16B16_SFLOAT:
93                 case VK_FORMAT_R16G16B16A16_SINT:
94                 case VK_FORMAT_R16G16B16A16_UINT:
95                 case VK_FORMAT_R16G16B16A16_SFLOAT:
96                         return static_cast<deUint32>(sizeof(deInt16) * 4);
97                 case VK_FORMAT_R32_SINT:
98                 case VK_FORMAT_R32_UINT:
99                 case VK_FORMAT_R32_SFLOAT:
100                         return static_cast<deUint32>(sizeof(deInt32));
101                 case VK_FORMAT_R32G32_SINT:
102                 case VK_FORMAT_R32G32_UINT:
103                 case VK_FORMAT_R32G32_SFLOAT:
104                         return static_cast<deUint32>(sizeof(deInt32) * 2);
105                 case VK_FORMAT_R32G32B32_SINT:
106                 case VK_FORMAT_R32G32B32_UINT:
107                 case VK_FORMAT_R32G32B32_SFLOAT:
108                 case VK_FORMAT_R32G32B32A32_SINT:
109                 case VK_FORMAT_R32G32B32A32_UINT:
110                 case VK_FORMAT_R32G32B32A32_SFLOAT:
111                         return static_cast<deUint32>(sizeof(deInt32) * 4);
112                 case VK_FORMAT_R64_SINT:
113                 case VK_FORMAT_R64_UINT:
114                 case VK_FORMAT_R64_SFLOAT:
115                         return static_cast<deUint32>(sizeof(deInt64));
116                 case VK_FORMAT_R64G64_SINT:
117                 case VK_FORMAT_R64G64_UINT:
118                 case VK_FORMAT_R64G64_SFLOAT:
119                         return static_cast<deUint32>(sizeof(deInt64) * 2);
120                 case VK_FORMAT_R64G64B64_SINT:
121                 case VK_FORMAT_R64G64B64_UINT:
122                 case VK_FORMAT_R64G64B64_SFLOAT:
123                 case VK_FORMAT_R64G64B64A64_SINT:
124                 case VK_FORMAT_R64G64B64A64_UINT:
125                 case VK_FORMAT_R64G64B64A64_SFLOAT:
126                         return static_cast<deUint32>(sizeof(deInt64) * 4);
127                 // The below formats are used to represent bool and bvec* types. These
128                 // types are passed to the shader as int and ivec* types, before the
129                 // calculations are done as booleans. We need a distinct type here so
130                 // that the shader generators can switch on it and generate the correct
131                 // shader source for testing.
132                 case VK_FORMAT_R8_USCALED:
133                         return static_cast<deUint32>(sizeof(deInt32));
134                 case VK_FORMAT_R8G8_USCALED:
135                         return static_cast<deUint32>(sizeof(deInt32) * 2);
136                 case VK_FORMAT_R8G8B8_USCALED:
137                 case VK_FORMAT_R8G8B8A8_USCALED:
138                         return static_cast<deUint32>(sizeof(deInt32) * 4);
139         }
140 }
141
142 deUint32 getElementSizeInBytes(
143         const VkFormat format,
144         const subgroups::SSBOData::InputDataLayoutType layout)
145 {
146         deUint32 bytes = getFormatSizeInBytes(format);
147         if (layout == subgroups::SSBOData::LayoutStd140)
148                 return bytes < 16 ? 16 : bytes;
149         else
150                 return bytes;
151 }
152
153 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
154 {
155         VkAttachmentReference colorReference = {
156                 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
157         };
158
159         const VkSubpassDescription subpassDescription = {0u,
160                                                                                                          VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
161                                                                                                          DE_NULL, DE_NULL, 0, DE_NULL
162                                                                                                         };
163
164         const VkSubpassDependency subpassDependencies[2] = {
165                 {   VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
166                         VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
167                         VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
168                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
169                         VK_DEPENDENCY_BY_REGION_BIT
170                 },
171                 {   0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
172                         VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
173                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
174                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
175                         VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
176                 },
177         };
178
179         VkAttachmentDescription attachmentDescription = {0u, format,
180                                                                                                          VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
181                                                                                                          VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
182                                                                                                          VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
183                                                                                                          VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
184                                                                                                         };
185
186         const VkRenderPassCreateInfo renderPassCreateInfo = {
187                 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
188                 &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
189         };
190
191         return createRenderPass(context.getDeviceInterface(), context.getDevice(),
192                                                         &renderPassCreateInfo);
193 }
194
195 Move<VkPipeline> makeGraphicsPipeline(Context&                                                                  context,
196                                                                           const VkPipelineLayout                                        pipelineLayout,
197                                                                           const VkShaderStageFlags                                      stages,
198                                                                           const VkShaderModule                                          vertexShaderModule,
199                                                                           const VkShaderModule                                          fragmentShaderModule,
200                                                                           const VkShaderModule                                          geometryShaderModule,
201                                                                           const VkShaderModule                                          tessellationControlModule,
202                                                                           const VkShaderModule                                          tessellationEvaluationModule,
203                                                                           const VkRenderPass                                            renderPass,
204                                                                           const VkPrimitiveTopology                                     topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
205                                                                           const VkVertexInputBindingDescription*        vertexInputBindingDescription = DE_NULL,
206                                                                           const VkVertexInputAttributeDescription*      vertexInputAttributeDescriptions = DE_NULL,
207                                                                           const bool                                                            frameBufferTests = false,
208                                                                           const vk::VkFormat                                            attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
209 {
210         std::vector<VkViewport> noViewports;
211         std::vector<VkRect2D>   noScissors;
212
213         const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
214         {
215                 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,      // VkStructureType                                                              sType;
216                 DE_NULL,                                                                                                        // const void*                                                                  pNext;
217                 0u,                                                                                                                     // VkPipelineVertexInputStateCreateFlags                flags;
218                 vertexInputBindingDescription == DE_NULL ? 0u : 1u,                     // deUint32                                                                             vertexBindingDescriptionCount;
219                 vertexInputBindingDescription,                                                          // const VkVertexInputBindingDescription*               pVertexBindingDescriptions;
220                 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,          // deUint32                                                                             vertexAttributeDescriptionCount;
221                 vertexInputAttributeDescriptions,                                                       // const VkVertexInputAttributeDescription*             pVertexAttributeDescriptions;
222         };
223
224         const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
225         const VkColorComponentFlags colorComponent =
226                                                                                                 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
227                                                                                                 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
228                                                                                                 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
229                                                                                                 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
230
231         const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
232         {
233                 VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
234                 VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
235                 colorComponent
236         };
237
238         const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
239         {
240                 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
241                 VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
242                 { 0.0f, 0.0f, 0.0f, 0.0f }
243         };
244
245         const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
246
247         return vk::makeGraphicsPipeline(context.getDeviceInterface(),   // const DeviceInterface&                        vk
248                                                                         context.getDevice(),                    // const VkDevice                                device
249                                                                         pipelineLayout,                                 // const VkPipelineLayout                        pipelineLayout
250                                                                         vertexShaderModule,                             // const VkShaderModule                          vertexShaderModule
251                                                                         tessellationControlModule,              // const VkShaderModule                          tessellationControlShaderModule
252                                                                         tessellationEvaluationModule,   // const VkShaderModule                          tessellationEvalShaderModule
253                                                                         geometryShaderModule,                   // const VkShaderModule                          geometryShaderModule
254                                                                         fragmentShaderModule,                   // const VkShaderModule                          fragmentShaderModule
255                                                                         renderPass,                                             // const VkRenderPass                            renderPass
256                                                                         noViewports,                                    // const std::vector<VkViewport>&                viewports
257                                                                         noScissors,                                             // const std::vector<VkRect2D>&                  scissors
258                                                                         topology,                                               // const VkPrimitiveTopology                     topology
259                                                                         0u,                                                             // const deUint32                                subpass
260                                                                         patchControlPoints,                             // const deUint32                                patchControlPoints
261                                                                         &vertexInputStateCreateInfo,    // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
262                                                                         DE_NULL,                                                // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
263                                                                         DE_NULL,                                                // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
264                                                                         DE_NULL,                                                // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
265                                                                         &colorBlendStateCreateInfo);    // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
266 }
267
268 Move<VkPipeline> makeComputePipeline(Context& context,
269                                                                          const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
270                                                                          deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
271 {
272         const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
273
274         const vk::VkSpecializationMapEntry entries[3] =
275         {
276                 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
277                 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
278                 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
279         };
280
281         const vk::VkSpecializationInfo info =
282         {
283                 /* mapEntryCount = */ 3,
284                 /* pMapEntries   = */ entries,
285                 /* dataSize      = */ sizeof(localSize),
286                 /* pData         = */ localSize
287         };
288
289         const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
290         {
291                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,    // VkStructureType                                      sType;
292                 DE_NULL,                                                                                                // const void*                                          pNext;
293                 0u,                                                                                                             // VkPipelineShaderStageCreateFlags     flags;
294                 VK_SHADER_STAGE_COMPUTE_BIT,                                                    // VkShaderStageFlagBits                        stage;
295                 shaderModule,                                                                                   // VkShaderModule                                       module;
296                 "main",                                                                                                 // const char*                                          pName;
297                 &info,                                                                                                  // const VkSpecializationInfo*          pSpecializationInfo;
298         };
299
300         const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
301         {
302                 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType      sType;
303                 DE_NULL,                                                                                // const void*                                          pNext;
304                 0u,                                                                                             // VkPipelineCreateFlags                        flags;
305                 pipelineShaderStageParams,                                              // VkPipelineShaderStageCreateInfo      stage;
306                 pipelineLayout,                                                                 // VkPipelineLayout                                     layout;
307                 DE_NULL,                                                                                // VkPipeline                                           basePipelineHandle;
308                 0,                                                                                              // deInt32                                                      basePipelineIndex;
309         };
310
311         return createComputePipeline(context.getDeviceInterface(),
312                                                                  context.getDevice(), DE_NULL, &pipelineCreateInfo);
313 }
314
315 Move<VkCommandBuffer> makeCommandBuffer(
316         Context& context, const VkCommandPool commandPool)
317 {
318         const VkCommandBufferAllocateInfo bufferAllocateParams =
319         {
320                 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType              sType;
321                 DE_NULL,                                                                                // const void*                  pNext;
322                 commandPool,                                                                    // VkCommandPool                commandPool;
323                 VK_COMMAND_BUFFER_LEVEL_PRIMARY,                                // VkCommandBufferLevel level;
324                 1u,                                                                                             // deUint32                             bufferCount;
325         };
326         return allocateCommandBuffer(context.getDeviceInterface(),
327                                                                  context.getDevice(), &bufferAllocateParams);
328 }
329
330 struct Buffer;
331 struct Image;
332
333 struct BufferOrImage
334 {
335         bool isImage() const
336         {
337                 return m_isImage;
338         }
339
340         Buffer* getAsBuffer()
341         {
342                 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
343                 return reinterpret_cast<Buffer* >(this);
344         }
345
346         Image* getAsImage()
347         {
348                 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
349                 return reinterpret_cast<Image*>(this);
350         }
351
352         virtual VkDescriptorType getType() const
353         {
354                 if (m_isImage)
355                 {
356                         return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
357                 }
358                 else
359                 {
360                         return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
361                 }
362         }
363
364         Allocation& getAllocation() const
365         {
366                 return *m_allocation;
367         }
368
369         virtual ~BufferOrImage() {}
370
371 protected:
372         explicit BufferOrImage(bool image) : m_isImage(image) {}
373
374         bool m_isImage;
375         de::details::MovePtr<Allocation> m_allocation;
376 };
377
378 struct Buffer : public BufferOrImage
379 {
380         explicit Buffer(
381                 Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
382                 : BufferOrImage         (false)
383                 , m_sizeInBytes         (sizeInBytes)
384                 , m_usage                       (usage)
385         {
386                 const DeviceInterface&                  vkd                                     = context.getDeviceInterface();
387                 const VkDevice                                  device                          = context.getDevice();
388
389                 const vk::VkBufferCreateInfo    bufferCreateInfo        =
390                 {
391                         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
392                         DE_NULL,
393                         0u,
394                         m_sizeInBytes,
395                         m_usage,
396                         VK_SHARING_MODE_EXCLUSIVE,
397                         0u,
398                         DE_NULL,
399                 };
400                 m_buffer                = createBuffer(vkd, device, &bufferCreateInfo);
401
402                 VkMemoryRequirements                    req                                     = getBufferMemoryRequirements(vkd, device, *m_buffer);
403
404                 m_allocation    = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
405                 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
406         }
407
408         virtual VkDescriptorType getType() const
409         {
410                 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
411                 {
412                         return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
413                 }
414                 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
415         }
416
417         VkBuffer getBuffer () const
418         {
419                 return *m_buffer;
420         }
421
422         const VkBuffer* getBufferPtr () const
423         {
424                 return &(*m_buffer);
425         }
426
427         VkDeviceSize getSize () const
428         {
429                 return m_sizeInBytes;
430         }
431
432 private:
433         Move<VkBuffer>                          m_buffer;
434         VkDeviceSize                            m_sizeInBytes;
435         const VkBufferUsageFlags        m_usage;
436 };
437
438 struct Image : public BufferOrImage
439 {
440         explicit Image(Context& context, deUint32 width, deUint32 height,
441                                    VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
442                 : BufferOrImage(true)
443         {
444                 const DeviceInterface&                  vk                                      = context.getDeviceInterface();
445                 const VkDevice                                  device                          = context.getDevice();
446                 const deUint32                                  queueFamilyIndex        = context.getUniversalQueueFamilyIndex();
447
448                 const VkImageCreateInfo                 imageCreateInfo         =
449                 {
450                         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
451                         format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
452                         VK_IMAGE_TILING_OPTIMAL, usage,
453                         VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
454                         VK_IMAGE_LAYOUT_UNDEFINED
455                 };
456
457                 const VkComponentMapping                componentMapping        =
458                 {
459                         VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
460                         VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
461                 };
462
463                 const VkImageSubresourceRange   subresourceRange        =
464                 {
465                         VK_IMAGE_ASPECT_COLOR_BIT,      //VkImageAspectFlags    aspectMask
466                         0u,                                                     //deUint32                              baseMipLevel
467                         1u,                                                     //deUint32                              levelCount
468                         0u,                                                     //deUint32                              baseArrayLayer
469                         1u                                                      //deUint32                              layerCount
470                 };
471
472                 const VkSamplerCreateInfo               samplerCreateInfo       =
473                 {
474                         VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
475                         DE_NULL,
476                         0u,
477                         VK_FILTER_NEAREST,
478                         VK_FILTER_NEAREST,
479                         VK_SAMPLER_MIPMAP_MODE_NEAREST,
480                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
481                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
482                         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
483                         0.0f,
484                         VK_FALSE,
485                         1.0f,
486                         DE_FALSE,
487                         VK_COMPARE_OP_ALWAYS,
488                         0.0f,
489                         0.0f,
490                         VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
491                         VK_FALSE,
492                 };
493
494                 m_image                 = createImage(vk, device, &imageCreateInfo);
495
496                 VkMemoryRequirements                    req                                     = getImageMemoryRequirements(vk, device, *m_image);
497
498                 req.size                *= 2;
499                 m_allocation    = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
500
501                 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
502
503                 const VkImageViewCreateInfo             imageViewCreateInfo     =
504                 {
505                         VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
506                         VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
507                         subresourceRange
508                 };
509
510                 m_imageView             = createImageView(vk, device, &imageViewCreateInfo);
511                 m_sampler               = createSampler(vk, device, &samplerCreateInfo);
512
513                 // Transition input image layouts
514                 {
515                         const Unique<VkCommandPool>             cmdPool                 (makeCommandPool(vk, device, queueFamilyIndex));
516                         const Unique<VkCommandBuffer>   cmdBuffer               (makeCommandBuffer(context, *cmdPool));
517
518                         beginCommandBuffer(vk, *cmdBuffer);
519
520                         const VkImageMemoryBarrier              imageBarrier    = makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
521                                                                                                                                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
522
523                         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
524                                 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
525
526                         endCommandBuffer(vk, *cmdBuffer);
527                         submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
528                 }
529         }
530
531         VkImage getImage () const
532         {
533                 return *m_image;
534         }
535
536         VkImageView getImageView () const
537         {
538                 return *m_imageView;
539         }
540
541         VkSampler getSampler () const
542         {
543                 return *m_sampler;
544         }
545
546 private:
547         Move<VkImage> m_image;
548         Move<VkImageView> m_imageView;
549         Move<VkSampler> m_sampler;
550 };
551 }
552
553 std::string vkt::subgroups::getSharedMemoryBallotHelper()
554 {
555         return  "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
556                         "uvec4 sharedMemoryBallot(bool vote)\n"
557                         "{\n"
558                         "  uint groupOffset = gl_SubgroupID;\n"
559                         "  // One invocation in the group 0's the whole group's data\n"
560                         "  if (subgroupElect())\n"
561                         "  {\n"
562                         "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
563                         "  }\n"
564                         "  subgroupMemoryBarrierShared();\n"
565                         "  if (vote)\n"
566                         "  {\n"
567                         "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
568                         "    const highp uint bitToSet = 1u << invocationId;\n"
569                         "    switch (gl_SubgroupInvocationID / 32)\n"
570                         "    {\n"
571                         "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
572                         "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
573                         "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
574                         "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
575                         "    }\n"
576                         "  }\n"
577                         "  subgroupMemoryBarrierShared();\n"
578                         "  return superSecretComputeShaderHelper[groupOffset];\n"
579                         "}\n";
580 }
581
582 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
583 {
584         return  "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
585                         "uint64_t sharedMemoryBallot(bool vote)\n"
586                         "{\n"
587                         "  uint groupOffset = gl_SubgroupID;\n"
588                         "  // One invocation in the group 0's the whole group's data\n"
589                         "  if (subgroupElect())\n"
590                         "  {\n"
591                         "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
592                         "  }\n"
593                         "  subgroupMemoryBarrierShared();\n"
594                         "  if (vote)\n"
595                         "  {\n"
596                         "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
597                         "    const highp uint bitToSet = 1u << invocationId;\n"
598                         "    switch (gl_SubgroupInvocationID / 32)\n"
599                         "    {\n"
600                         "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
601                         "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
602                         "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
603                         "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
604                         "    }\n"
605                         "  }\n"
606                         "  subgroupMemoryBarrierShared();\n"
607                         "  return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
608                         "}\n";
609 }
610
611 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
612 {
613         VkPhysicalDeviceSubgroupProperties subgroupProperties;
614         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
615         subgroupProperties.pNext = DE_NULL;
616
617         VkPhysicalDeviceProperties2 properties;
618         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
619         properties.pNext = &subgroupProperties;
620
621         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
622
623         return subgroupProperties.subgroupSize;
624 }
625
626 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
627         return 128u;
628 }
629
630 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
631 {
632         switch (stage)
633         {
634                 default:
635                         DE_FATAL("Unhandled stage!");
636                         return "";
637                 case VK_SHADER_STAGE_COMPUTE_BIT:
638                         return "compute";
639                 case VK_SHADER_STAGE_FRAGMENT_BIT:
640                         return "fragment";
641                 case VK_SHADER_STAGE_VERTEX_BIT:
642                         return "vertex";
643                 case VK_SHADER_STAGE_GEOMETRY_BIT:
644                         return "geometry";
645                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
646                         return "tess_control";
647                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
648                         return "tess_eval";
649         }
650 }
651
652 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
653 {
654         switch (bit)
655         {
656                 default:
657                         DE_FATAL("Unknown subgroup feature category!");
658                         return "";
659                 case VK_SUBGROUP_FEATURE_BASIC_BIT:
660                         return "VK_SUBGROUP_FEATURE_BASIC_BIT";
661                 case VK_SUBGROUP_FEATURE_VOTE_BIT:
662                         return "VK_SUBGROUP_FEATURE_VOTE_BIT";
663                 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
664                         return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
665                 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
666                         return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
667                 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
668                         return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
669                 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
670                         return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
671                 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
672                         return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
673                 case VK_SUBGROUP_FEATURE_QUAD_BIT:
674                         return "VK_SUBGROUP_FEATURE_QUAD_BIT";
675         }
676 }
677
678 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
679 {
680         {
681         /*
682                 "#version 450\n"
683                 "void main (void)\n"
684                 "{\n"
685                 "  float pixelSize = 2.0f/1024.0f;\n"
686                 "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
687                 "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
688                 "  gl_PointSize = 1.0f;\n"
689                 "}\n"
690         */
691                 const std::string vertNoSubgroup =
692                         "; SPIR-V\n"
693                         "; Version: 1.3\n"
694                         "; Generator: Khronos Glslang Reference Front End; 1\n"
695                         "; Bound: 37\n"
696                         "; Schema: 0\n"
697                         "OpCapability Shader\n"
698                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
699                         "OpMemoryModel Logical GLSL450\n"
700                         "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
701                         "OpMemberDecorate %20 0 BuiltIn Position\n"
702                         "OpMemberDecorate %20 1 BuiltIn PointSize\n"
703                         "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
704                         "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
705                         "OpDecorate %20 Block\n"
706                         "OpDecorate %26 BuiltIn VertexIndex\n"
707                         "%2 = OpTypeVoid\n"
708                         "%3 = OpTypeFunction %2\n"
709                         "%6 = OpTypeFloat 32\n"
710                         "%7 = OpTypePointer Function %6\n"
711                         "%9 = OpConstant %6 0.00195313\n"
712                         "%12 = OpConstant %6 2\n"
713                         "%14 = OpConstant %6 1\n"
714                         "%16 = OpTypeVector %6 4\n"
715                         "%17 = OpTypeInt 32 0\n"
716                         "%18 = OpConstant %17 1\n"
717                         "%19 = OpTypeArray %6 %18\n"
718                         "%20 = OpTypeStruct %16 %6 %19 %19\n"
719                         "%21 = OpTypePointer Output %20\n"
720                         "%22 = OpVariable %21 Output\n"
721                         "%23 = OpTypeInt 32 1\n"
722                         "%24 = OpConstant %23 0\n"
723                         "%25 = OpTypePointer Input %23\n"
724                         "%26 = OpVariable %25 Input\n"
725                         "%33 = OpConstant %6 0\n"
726                         "%35 = OpTypePointer Output %16\n"
727                         "%37 = OpConstant %23 1\n"
728                         "%38 = OpTypePointer Output %6\n"
729                         "%4 = OpFunction %2 None %3\n"
730                         "%5 = OpLabel\n"
731                         "%8 = OpVariable %7 Function\n"
732                         "%10 = OpVariable %7 Function\n"
733                         "OpStore %8 %9\n"
734                         "%11 = OpLoad %6 %8\n"
735                         "%13 = OpFDiv %6 %11 %12\n"
736                         "%15 = OpFSub %6 %13 %14\n"
737                         "OpStore %10 %15\n"
738                         "%27 = OpLoad %23 %26\n"
739                         "%28 = OpConvertSToF %6 %27\n"
740                         "%29 = OpLoad %6 %8\n"
741                         "%30 = OpFMul %6 %28 %29\n"
742                         "%31 = OpLoad %6 %10\n"
743                         "%32 = OpFAdd %6 %30 %31\n"
744                         "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
745                         "%36 = OpAccessChain %35 %22 %24\n"
746                         "OpStore %36 %34\n"
747                         "%39 = OpAccessChain %38 %22 %37\n"
748                         "OpStore %39 %14\n"
749                         "OpReturn\n"
750                         "OpFunctionEnd\n";
751                 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
752         }
753
754         {
755         /*
756                 "#version 450\n"
757                 "layout(vertices=1) out;\n"
758                 "\n"
759                 "void main (void)\n"
760                 "{\n"
761                 "  if (gl_InvocationID == 0)\n"
762                 "  {\n"
763                 "    gl_TessLevelOuter[0] = 1.0f;\n"
764                 "    gl_TessLevelOuter[1] = 1.0f;\n"
765                 "  }\n"
766                 "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
767                 "}\n"
768         */
769                 const std::string tescNoSubgroup =
770                         "; SPIR-V\n"
771                         "; Version: 1.3\n"
772                         "; Generator: Khronos Glslang Reference Front End; 1\n"
773                         "; Bound: 45\n"
774                         "; Schema: 0\n"
775                         "OpCapability Tessellation\n"
776                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
777                         "OpMemoryModel Logical GLSL450\n"
778                         "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
779                         "OpExecutionMode %4 OutputVertices 1\n"
780                         "OpDecorate %8 BuiltIn InvocationId\n"
781                         "OpDecorate %20 Patch\n"
782                         "OpDecorate %20 BuiltIn TessLevelOuter\n"
783                         "OpMemberDecorate %29 0 BuiltIn Position\n"
784                         "OpMemberDecorate %29 1 BuiltIn PointSize\n"
785                         "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
786                         "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
787                         "OpDecorate %29 Block\n"
788                         "OpMemberDecorate %34 0 BuiltIn Position\n"
789                         "OpMemberDecorate %34 1 BuiltIn PointSize\n"
790                         "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
791                         "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
792                         "OpDecorate %34 Block\n"
793                         "%2 = OpTypeVoid\n"
794                         "%3 = OpTypeFunction %2\n"
795                         "%6 = OpTypeInt 32 1\n"
796                         "%7 = OpTypePointer Input %6\n"
797                         "%8 = OpVariable %7 Input\n"
798                         "%10 = OpConstant %6 0\n"
799                         "%11 = OpTypeBool\n"
800                         "%15 = OpTypeFloat 32\n"
801                         "%16 = OpTypeInt 32 0\n"
802                         "%17 = OpConstant %16 4\n"
803                         "%18 = OpTypeArray %15 %17\n"
804                         "%19 = OpTypePointer Output %18\n"
805                         "%20 = OpVariable %19 Output\n"
806                         "%21 = OpConstant %15 1\n"
807                         "%22 = OpTypePointer Output %15\n"
808                         "%24 = OpConstant %6 1\n"
809                         "%26 = OpTypeVector %15 4\n"
810                         "%27 = OpConstant %16 1\n"
811                         "%28 = OpTypeArray %15 %27\n"
812                         "%29 = OpTypeStruct %26 %15 %28 %28\n"
813                         "%30 = OpTypeArray %29 %27\n"
814                         "%31 = OpTypePointer Output %30\n"
815                         "%32 = OpVariable %31 Output\n"
816                         "%34 = OpTypeStruct %26 %15 %28 %28\n"
817                         "%35 = OpConstant %16 32\n"
818                         "%36 = OpTypeArray %34 %35\n"
819                         "%37 = OpTypePointer Input %36\n"
820                         "%38 = OpVariable %37 Input\n"
821                         "%40 = OpTypePointer Input %26\n"
822                         "%43 = OpTypePointer Output %26\n"
823                         "%4 = OpFunction %2 None %3\n"
824                         "%5 = OpLabel\n"
825                         "%9 = OpLoad %6 %8\n"
826                         "%12 = OpIEqual %11 %9 %10\n"
827                         "OpSelectionMerge %14 None\n"
828                         "OpBranchConditional %12 %13 %14\n"
829                         "%13 = OpLabel\n"
830                         "%23 = OpAccessChain %22 %20 %10\n"
831                         "OpStore %23 %21\n"
832                         "%25 = OpAccessChain %22 %20 %24\n"
833                         "OpStore %25 %21\n"
834                         "OpBranch %14\n"
835                         "%14 = OpLabel\n"
836                         "%33 = OpLoad %6 %8\n"
837                         "%39 = OpLoad %6 %8\n"
838                         "%41 = OpAccessChain %40 %38 %39 %10\n"
839                         "%42 = OpLoad %26 %41\n"
840                         "%44 = OpAccessChain %43 %32 %33 %10\n"
841                         "OpStore %44 %42\n"
842                         "OpReturn\n"
843                         "OpFunctionEnd\n";
844                 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
845         }
846
847         {
848         /*
849                 "#version 450\n"
850                 "layout(isolines) in;\n"
851                 "\n"
852                 "void main (void)\n"
853                 "{\n"
854                 "  float pixelSize = 2.0f/1024.0f;\n"
855                 "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
856                 "}\n";
857         */
858                 const std::string teseNoSubgroup =
859                         "; SPIR-V\n"
860                         "; Version: 1.3\n"
861                         "; Generator: Khronos Glslang Reference Front End; 2\n"
862                         "; Bound: 42\n"
863                         "; Schema: 0\n"
864                         "OpCapability Tessellation\n"
865                         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
866                         "OpMemoryModel Logical GLSL450\n"
867                         "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
868                         "OpExecutionMode %4 Isolines\n"
869                         "OpExecutionMode %4 SpacingEqual\n"
870                         "OpExecutionMode %4 VertexOrderCcw\n"
871                         "OpMemberDecorate %14 0 BuiltIn Position\n"
872                         "OpMemberDecorate %14 1 BuiltIn PointSize\n"
873                         "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
874                         "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
875                         "OpDecorate %14 Block\n"
876                         "OpMemberDecorate %19 0 BuiltIn Position\n"
877                         "OpMemberDecorate %19 1 BuiltIn PointSize\n"
878                         "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
879                         "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
880                         "OpDecorate %19 Block\n"
881                         "OpDecorate %29 BuiltIn TessCoord\n"
882                         "%2 = OpTypeVoid\n"
883                         "%3 = OpTypeFunction %2\n"
884                         "%6 = OpTypeFloat 32\n"
885                         "%7 = OpTypePointer Function %6\n"
886                         "%9 = OpConstant %6 0.00195313\n"
887                         "%10 = OpTypeVector %6 4\n"
888                         "%11 = OpTypeInt 32 0\n"
889                         "%12 = OpConstant %11 1\n"
890                         "%13 = OpTypeArray %6 %12\n"
891                         "%14 = OpTypeStruct %10 %6 %13 %13\n"
892                         "%15 = OpTypePointer Output %14\n"
893                         "%16 = OpVariable %15 Output\n"
894                         "%17 = OpTypeInt 32 1\n"
895                         "%18 = OpConstant %17 0\n"
896                         "%19 = OpTypeStruct %10 %6 %13 %13\n"
897                         "%20 = OpConstant %11 32\n"
898                         "%21 = OpTypeArray %19 %20\n"
899                         "%22 = OpTypePointer Input %21\n"
900                         "%23 = OpVariable %22 Input\n"
901                         "%24 = OpTypePointer Input %10\n"
902                         "%27 = OpTypeVector %6 3\n"
903                         "%28 = OpTypePointer Input %27\n"
904                         "%29 = OpVariable %28 Input\n"
905                         "%30 = OpConstant %11 0\n"
906                         "%31 = OpTypePointer Input %6\n"
907                         "%36 = OpConstant %6 2\n"
908                         "%40 = OpTypePointer Output %10\n"
909                         "%4 = OpFunction %2 None %3\n"
910                         "%5 = OpLabel\n"
911                         "%8 = OpVariable %7 Function\n"
912                         "OpStore %8 %9\n"
913                         "%25 = OpAccessChain %24 %23 %18 %18\n"
914                         "%26 = OpLoad %10 %25\n"
915                         "%32 = OpAccessChain %31 %29 %30\n"
916                         "%33 = OpLoad %6 %32\n"
917                         "%34 = OpLoad %6 %8\n"
918                         "%35 = OpFMul %6 %33 %34\n"
919                         "%37 = OpFDiv %6 %35 %36\n"
920                         "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
921                         "%39 = OpFAdd %10 %26 %38\n"
922                         "%41 = OpAccessChain %40 %16 %18\n"
923                         "OpStore %41 %39\n"
924                         "OpReturn\n"
925                         "OpFunctionEnd\n";
926                 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
927         }
928
929 }
930
931
932 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
933 {
934         switch (stage)
935         {
936                 default:
937                         DE_FATAL("Unhandled stage!");
938                         return "";
939                 case VK_SHADER_STAGE_FRAGMENT_BIT:
940                         return
941                                 "#version 450\n"
942                                 "void main (void)\n"
943                                 "{\n"
944                                 "  float pixelSize = 2.0f/1024.0f;\n"
945                                 "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
946                                 "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
947                                 "}\n";
948                 case VK_SHADER_STAGE_GEOMETRY_BIT:
949                         return
950                                 "#version 450\n"
951                                 "void main (void)\n"
952                                 "{\n"
953                                 "}\n";
954                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
955                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
956                         return
957                                 "#version 450\n"
958                                 "void main (void)\n"
959                                 "{\n"
960                                 "}\n";
961         }
962 }
963
964 bool vkt::subgroups::isSubgroupSupported(Context& context)
965 {
966         return context.contextSupports(vk::ApiVersion(1, 1, 0));
967 }
968
969 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
970         Context& context, const VkShaderStageFlags stage)
971 {
972         VkPhysicalDeviceSubgroupProperties subgroupProperties;
973         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
974         subgroupProperties.pNext = DE_NULL;
975
976         VkPhysicalDeviceProperties2 properties;
977         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
978         properties.pNext = &subgroupProperties;
979
980         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
981
982         return (stage & subgroupProperties.supportedStages) ? true : false;
983 }
984
985 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
986         VkShaderStageFlags stage)
987 {
988         switch (stage)
989         {
990                 default:
991                         return false;
992                 case VK_SHADER_STAGE_COMPUTE_BIT:
993                         return true;
994         }
995 }
996
997 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
998         Context& context,
999         VkSubgroupFeatureFlagBits bit) {
1000         VkPhysicalDeviceSubgroupProperties subgroupProperties;
1001         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1002         subgroupProperties.pNext = DE_NULL;
1003
1004         VkPhysicalDeviceProperties2 properties;
1005         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1006         properties.pNext = &subgroupProperties;
1007
1008         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1009
1010         return (bit & subgroupProperties.supportedOperations) ? true : false;
1011 }
1012
1013 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
1014 {
1015         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1016                                 context.getInstanceInterface(), context.getPhysicalDevice());
1017         return features.fragmentStoresAndAtomics ? true : false;
1018 }
1019
1020 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1021 {
1022         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1023                                 context.getInstanceInterface(), context.getPhysicalDevice());
1024         return features.vertexPipelineStoresAndAtomics ? true : false;
1025 }
1026
1027 bool vkt::subgroups::isInt64SupportedForDevice(Context& context)
1028 {
1029         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1030                                 context.getInstanceInterface(), context.getPhysicalDevice());
1031         return features.shaderInt64 ? true : false;
1032 }
1033
1034 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1035 {
1036         const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1037                 context.getInstanceInterface(), context.getPhysicalDevice());
1038         return features.shaderTessellationAndGeometryPointSize ? true : false;
1039 }
1040
1041 bool vkt::subgroups::isFormatSupportedForDevice(Context& context, vk::VkFormat format)
1042 {
1043         VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures subgroupExtendedTypesFeatures;
1044         deMemset(&subgroupExtendedTypesFeatures, 0, sizeof(subgroupExtendedTypesFeatures));
1045         subgroupExtendedTypesFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES;
1046         subgroupExtendedTypesFeatures.pNext = DE_NULL;
1047
1048         VkPhysicalDeviceShaderFloat16Int8Features float16Int8Features;
1049         deMemset(&float16Int8Features, 0, sizeof(float16Int8Features));
1050         float16Int8Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
1051         float16Int8Features.pNext = DE_NULL;
1052
1053         VkPhysicalDeviceFeatures2 features2;
1054         deMemset(&features2, 0, sizeof(features2));
1055         features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1056         features2.pNext = DE_NULL;
1057
1058         if (isDeviceExtensionSupported(context.getUsedApiVersion(), context.getDeviceExtensions(), "VK_KHR_shader_subgroup_extended_types") &&
1059                 isDeviceExtensionSupported(context.getUsedApiVersion(), context.getDeviceExtensions(), "VK_KHR_shader_float16_int8"))
1060         {
1061                 features2.pNext = &subgroupExtendedTypesFeatures;
1062                 subgroupExtendedTypesFeatures.pNext = &float16Int8Features;
1063         }
1064
1065         const PlatformInterface&                platformInterface               = context.getPlatformInterface();
1066         const VkInstance                                instance                                = context.getInstance();
1067         const InstanceDriver                    instanceDriver                  (platformInterface, instance);
1068
1069         instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1070
1071         switch (format)
1072         {
1073                 default:
1074                         return true;
1075                 case VK_FORMAT_R16_SFLOAT:
1076                 case VK_FORMAT_R16G16_SFLOAT:
1077                 case VK_FORMAT_R16G16B16_SFLOAT:
1078                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1079                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderFloat16 ? true : false;
1080                 case VK_FORMAT_R64_SFLOAT:
1081                 case VK_FORMAT_R64G64_SFLOAT:
1082                 case VK_FORMAT_R64G64B64_SFLOAT:
1083                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1084                         return features2.features.shaderFloat64 ? true : false;
1085                 case VK_FORMAT_R8_SINT:
1086                 case VK_FORMAT_R8G8_SINT:
1087                 case VK_FORMAT_R8G8B8_SINT:
1088                 case VK_FORMAT_R8G8B8A8_SINT:
1089                 case VK_FORMAT_R8_UINT:
1090                 case VK_FORMAT_R8G8_UINT:
1091                 case VK_FORMAT_R8G8B8_UINT:
1092                 case VK_FORMAT_R8G8B8A8_UINT:
1093                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderInt8 ? true : false;
1094                 case VK_FORMAT_R16_SINT:
1095                 case VK_FORMAT_R16G16_SINT:
1096                 case VK_FORMAT_R16G16B16_SINT:
1097                 case VK_FORMAT_R16G16B16A16_SINT:
1098                 case VK_FORMAT_R16_UINT:
1099                 case VK_FORMAT_R16G16_UINT:
1100                 case VK_FORMAT_R16G16B16_UINT:
1101                 case VK_FORMAT_R16G16B16A16_UINT:
1102                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt16 ? true : false;
1103                 case VK_FORMAT_R64_SINT:
1104                 case VK_FORMAT_R64G64_SINT:
1105                 case VK_FORMAT_R64G64B64_SINT:
1106                 case VK_FORMAT_R64G64B64A64_SINT:
1107                 case VK_FORMAT_R64_UINT:
1108                 case VK_FORMAT_R64G64_UINT:
1109                 case VK_FORMAT_R64G64B64_UINT:
1110                 case VK_FORMAT_R64G64B64A64_UINT:
1111                         return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt64 ? true : false;
1112         }
1113 }
1114
1115 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1116 {
1117         return context.contextSupports(vk::ApiVersion(1, 2, 0)) &&
1118                 vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1119 }
1120
1121 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1122 {
1123         switch (format)
1124         {
1125                 default:
1126                         DE_FATAL("Unhandled format!");
1127                         return "";
1128                 case VK_FORMAT_R8_SINT:
1129                         return "int8_t";
1130                 case VK_FORMAT_R8G8_SINT:
1131                         return "i8vec2";
1132                 case VK_FORMAT_R8G8B8_SINT:
1133                         return "i8vec3";
1134                 case VK_FORMAT_R8G8B8A8_SINT:
1135                         return "i8vec4";
1136                 case VK_FORMAT_R8_UINT:
1137                         return "uint8_t";
1138                 case VK_FORMAT_R8G8_UINT:
1139                         return "u8vec2";
1140                 case VK_FORMAT_R8G8B8_UINT:
1141                         return "u8vec3";
1142                 case VK_FORMAT_R8G8B8A8_UINT:
1143                         return "u8vec4";
1144                 case VK_FORMAT_R16_SINT:
1145                         return "int16_t";
1146                 case VK_FORMAT_R16G16_SINT:
1147                         return "i16vec2";
1148                 case VK_FORMAT_R16G16B16_SINT:
1149                         return "i16vec3";
1150                 case VK_FORMAT_R16G16B16A16_SINT:
1151                         return "i16vec4";
1152                 case VK_FORMAT_R16_UINT:
1153                         return "uint16_t";
1154                 case VK_FORMAT_R16G16_UINT:
1155                         return "u16vec2";
1156                 case VK_FORMAT_R16G16B16_UINT:
1157                         return "u16vec3";
1158                 case VK_FORMAT_R16G16B16A16_UINT:
1159                         return "u16vec4";
1160                 case VK_FORMAT_R32_SINT:
1161                         return "int";
1162                 case VK_FORMAT_R32G32_SINT:
1163                         return "ivec2";
1164                 case VK_FORMAT_R32G32B32_SINT:
1165                         return "ivec3";
1166                 case VK_FORMAT_R32G32B32A32_SINT:
1167                         return "ivec4";
1168                 case VK_FORMAT_R32_UINT:
1169                         return "uint";
1170                 case VK_FORMAT_R32G32_UINT:
1171                         return "uvec2";
1172                 case VK_FORMAT_R32G32B32_UINT:
1173                         return "uvec3";
1174                 case VK_FORMAT_R32G32B32A32_UINT:
1175                         return "uvec4";
1176                 case VK_FORMAT_R64_SINT:
1177                         return "int64_t";
1178                 case VK_FORMAT_R64G64_SINT:
1179                         return "i64vec2";
1180                 case VK_FORMAT_R64G64B64_SINT:
1181                         return "i64vec3";
1182                 case VK_FORMAT_R64G64B64A64_SINT:
1183                         return "i64vec4";
1184                 case VK_FORMAT_R64_UINT:
1185                         return "uint64_t";
1186                 case VK_FORMAT_R64G64_UINT:
1187                         return "u64vec2";
1188                 case VK_FORMAT_R64G64B64_UINT:
1189                         return "u64vec3";
1190                 case VK_FORMAT_R64G64B64A64_UINT:
1191                         return "u64vec4";
1192                 case VK_FORMAT_R16_SFLOAT:
1193                         return "float16_t";
1194                 case VK_FORMAT_R16G16_SFLOAT:
1195                         return "f16vec2";
1196                 case VK_FORMAT_R16G16B16_SFLOAT:
1197                         return "f16vec3";
1198                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1199                         return "f16vec4";
1200                 case VK_FORMAT_R32_SFLOAT:
1201                         return "float";
1202                 case VK_FORMAT_R32G32_SFLOAT:
1203                         return "vec2";
1204                 case VK_FORMAT_R32G32B32_SFLOAT:
1205                         return "vec3";
1206                 case VK_FORMAT_R32G32B32A32_SFLOAT:
1207                         return "vec4";
1208                 case VK_FORMAT_R64_SFLOAT:
1209                         return "double";
1210                 case VK_FORMAT_R64G64_SFLOAT:
1211                         return "dvec2";
1212                 case VK_FORMAT_R64G64B64_SFLOAT:
1213                         return "dvec3";
1214                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1215                         return "dvec4";
1216                 case VK_FORMAT_R8_USCALED:
1217                         return "bool";
1218                 case VK_FORMAT_R8G8_USCALED:
1219                         return "bvec2";
1220                 case VK_FORMAT_R8G8B8_USCALED:
1221                         return "bvec3";
1222                 case VK_FORMAT_R8G8B8A8_USCALED:
1223                         return "bvec4";
1224         }
1225 }
1226
1227 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1228 {
1229         switch (format)
1230         {
1231                 default:
1232                         return "";
1233                 case VK_FORMAT_R8_SINT:
1234                 case VK_FORMAT_R8G8_SINT:
1235                 case VK_FORMAT_R8G8B8_SINT:
1236                 case VK_FORMAT_R8G8B8A8_SINT:
1237                 case VK_FORMAT_R8_UINT:
1238                 case VK_FORMAT_R8G8_UINT:
1239                 case VK_FORMAT_R8G8B8_UINT:
1240                 case VK_FORMAT_R8G8B8A8_UINT:
1241                         return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1242                 case VK_FORMAT_R16_SINT:
1243                 case VK_FORMAT_R16G16_SINT:
1244                 case VK_FORMAT_R16G16B16_SINT:
1245                 case VK_FORMAT_R16G16B16A16_SINT:
1246                 case VK_FORMAT_R16_UINT:
1247                 case VK_FORMAT_R16G16_UINT:
1248                 case VK_FORMAT_R16G16B16_UINT:
1249                 case VK_FORMAT_R16G16B16A16_UINT:
1250                         return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1251                 case VK_FORMAT_R64_SINT:
1252                 case VK_FORMAT_R64G64_SINT:
1253                 case VK_FORMAT_R64G64B64_SINT:
1254                 case VK_FORMAT_R64G64B64A64_SINT:
1255                 case VK_FORMAT_R64_UINT:
1256                 case VK_FORMAT_R64G64_UINT:
1257                 case VK_FORMAT_R64G64B64_UINT:
1258                 case VK_FORMAT_R64G64B64A64_UINT:
1259                         return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1260                 case VK_FORMAT_R16_SFLOAT:
1261                 case VK_FORMAT_R16G16_SFLOAT:
1262                 case VK_FORMAT_R16G16B16_SFLOAT:
1263                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1264                         return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1265         }
1266 }
1267
1268 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1269 {
1270         std::vector<VkFormat> formats;
1271
1272         formats.push_back(VK_FORMAT_R8_SINT);
1273         formats.push_back(VK_FORMAT_R8G8_SINT);
1274         formats.push_back(VK_FORMAT_R8G8B8_SINT);
1275         formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1276         formats.push_back(VK_FORMAT_R8_UINT);
1277         formats.push_back(VK_FORMAT_R8G8_UINT);
1278         formats.push_back(VK_FORMAT_R8G8B8_UINT);
1279         formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1280         formats.push_back(VK_FORMAT_R16_SINT);
1281         formats.push_back(VK_FORMAT_R16G16_SINT);
1282         formats.push_back(VK_FORMAT_R16G16B16_SINT);
1283         formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1284         formats.push_back(VK_FORMAT_R16_UINT);
1285         formats.push_back(VK_FORMAT_R16G16_UINT);
1286         formats.push_back(VK_FORMAT_R16G16B16_UINT);
1287         formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1288         formats.push_back(VK_FORMAT_R32_SINT);
1289         formats.push_back(VK_FORMAT_R32G32_SINT);
1290         formats.push_back(VK_FORMAT_R32G32B32_SINT);
1291         formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1292         formats.push_back(VK_FORMAT_R32_UINT);
1293         formats.push_back(VK_FORMAT_R32G32_UINT);
1294         formats.push_back(VK_FORMAT_R32G32B32_UINT);
1295         formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1296         formats.push_back(VK_FORMAT_R64_SINT);
1297         formats.push_back(VK_FORMAT_R64G64_SINT);
1298         formats.push_back(VK_FORMAT_R64G64B64_SINT);
1299         formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
1300         formats.push_back(VK_FORMAT_R64_UINT);
1301         formats.push_back(VK_FORMAT_R64G64_UINT);
1302         formats.push_back(VK_FORMAT_R64G64B64_UINT);
1303         formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
1304         formats.push_back(VK_FORMAT_R16_SFLOAT);
1305         formats.push_back(VK_FORMAT_R16G16_SFLOAT);
1306         formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
1307         formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
1308         formats.push_back(VK_FORMAT_R32_SFLOAT);
1309         formats.push_back(VK_FORMAT_R32G32_SFLOAT);
1310         formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
1311         formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
1312         formats.push_back(VK_FORMAT_R64_SFLOAT);
1313         formats.push_back(VK_FORMAT_R64G64_SFLOAT);
1314         formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
1315         formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
1316         formats.push_back(VK_FORMAT_R8_USCALED);
1317         formats.push_back(VK_FORMAT_R8G8_USCALED);
1318         formats.push_back(VK_FORMAT_R8G8B8_USCALED);
1319         formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
1320
1321         return formats;
1322 }
1323
1324 bool vkt::subgroups::isFormatSigned (VkFormat format)
1325 {
1326         switch (format)
1327         {
1328                 default:
1329                         return false;
1330                 case VK_FORMAT_R8_SINT:
1331                 case VK_FORMAT_R8G8_SINT:
1332                 case VK_FORMAT_R8G8B8_SINT:
1333                 case VK_FORMAT_R8G8B8A8_SINT:
1334                 case VK_FORMAT_R16_SINT:
1335                 case VK_FORMAT_R16G16_SINT:
1336                 case VK_FORMAT_R16G16B16_SINT:
1337                 case VK_FORMAT_R16G16B16A16_SINT:
1338                 case VK_FORMAT_R32_SINT:
1339                 case VK_FORMAT_R32G32_SINT:
1340                 case VK_FORMAT_R32G32B32_SINT:
1341                 case VK_FORMAT_R32G32B32A32_SINT:
1342                 case VK_FORMAT_R64_SINT:
1343                 case VK_FORMAT_R64G64_SINT:
1344                 case VK_FORMAT_R64G64B64_SINT:
1345                 case VK_FORMAT_R64G64B64A64_SINT:
1346                         return true;
1347         }
1348 }
1349
1350 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
1351 {
1352         switch (format)
1353         {
1354                 default:
1355                         return false;
1356                 case VK_FORMAT_R8_UINT:
1357                 case VK_FORMAT_R8G8_UINT:
1358                 case VK_FORMAT_R8G8B8_UINT:
1359                 case VK_FORMAT_R8G8B8A8_UINT:
1360                 case VK_FORMAT_R16_UINT:
1361                 case VK_FORMAT_R16G16_UINT:
1362                 case VK_FORMAT_R16G16B16_UINT:
1363                 case VK_FORMAT_R16G16B16A16_UINT:
1364                 case VK_FORMAT_R32_UINT:
1365                 case VK_FORMAT_R32G32_UINT:
1366                 case VK_FORMAT_R32G32B32_UINT:
1367                 case VK_FORMAT_R32G32B32A32_UINT:
1368                 case VK_FORMAT_R64_UINT:
1369                 case VK_FORMAT_R64G64_UINT:
1370                 case VK_FORMAT_R64G64B64_UINT:
1371                 case VK_FORMAT_R64G64B64A64_UINT:
1372                         return true;
1373         }
1374 }
1375
1376 bool vkt::subgroups::isFormatFloat (VkFormat format)
1377 {
1378         switch (format)
1379         {
1380                 default:
1381                         return false;
1382                 case VK_FORMAT_R16_SFLOAT:
1383                 case VK_FORMAT_R16G16_SFLOAT:
1384                 case VK_FORMAT_R16G16B16_SFLOAT:
1385                 case VK_FORMAT_R16G16B16A16_SFLOAT:
1386                 case VK_FORMAT_R32_SFLOAT:
1387                 case VK_FORMAT_R32G32_SFLOAT:
1388                 case VK_FORMAT_R32G32B32_SFLOAT:
1389                 case VK_FORMAT_R32G32B32A32_SFLOAT:
1390                 case VK_FORMAT_R64_SFLOAT:
1391                 case VK_FORMAT_R64G64_SFLOAT:
1392                 case VK_FORMAT_R64G64B64_SFLOAT:
1393                 case VK_FORMAT_R64G64B64A64_SFLOAT:
1394                         return true;
1395         }
1396 }
1397
1398 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
1399 {
1400         /*
1401                 "layout(location = 0) in highp vec4 in_position;\n"
1402                 "void main (void)\n"
1403                 "{\n"
1404                 "  gl_Position = in_position;\n"
1405                 "  gl_PointSize = 1.0f;\n"
1406                 "}\n";
1407         */
1408         programCollection.spirvAsmSources.add("vert") <<
1409                 "; SPIR-V\n"
1410                 "; Version: 1.3\n"
1411                 "; Generator: Khronos Glslang Reference Front End; 7\n"
1412                 "; Bound: 25\n"
1413                 "; Schema: 0\n"
1414                 "OpCapability Shader\n"
1415                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1416                 "OpMemoryModel Logical GLSL450\n"
1417                 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
1418                 "OpMemberDecorate %11 0 BuiltIn Position\n"
1419                 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1420                 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1421                 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1422                 "OpDecorate %11 Block\n"
1423                 "OpDecorate %17 Location 0\n"
1424                 "%2 = OpTypeVoid\n"
1425                 "%3 = OpTypeFunction %2\n"
1426                 "%6 = OpTypeFloat 32\n"
1427                 "%7 = OpTypeVector %6 4\n"
1428                 "%8 = OpTypeInt 32 0\n"
1429                 "%9 = OpConstant %8 1\n"
1430                 "%10 = OpTypeArray %6 %9\n"
1431                 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1432                 "%12 = OpTypePointer Output %11\n"
1433                 "%13 = OpVariable %12 Output\n"
1434                 "%14 = OpTypeInt 32 1\n"
1435                 "%15 = OpConstant %14 0\n"
1436                 "%16 = OpTypePointer Input %7\n"
1437                 "%17 = OpVariable %16 Input\n"
1438                 "%19 = OpTypePointer Output %7\n"
1439                 "%21 = OpConstant %14 1\n"
1440                 "%22 = OpConstant %6 1\n"
1441                 "%23 = OpTypePointer Output %6\n"
1442                 "%4 = OpFunction %2 None %3\n"
1443                 "%5 = OpLabel\n"
1444                 "%18 = OpLoad %7 %17\n"
1445                 "%20 = OpAccessChain %19 %13 %15\n"
1446                 "OpStore %20 %18\n"
1447                 "%24 = OpAccessChain %23 %13 %21\n"
1448                 "OpStore %24 %22\n"
1449                 "OpReturn\n"
1450                 "OpFunctionEnd\n";
1451 }
1452
1453 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
1454 {
1455         /*
1456                 "layout(location = 0) in float in_color;\n"
1457                 "layout(location = 0) out uint out_color;\n"
1458                 "void main()\n"
1459                 {\n"
1460                 "       out_color = uint(in_color);\n"
1461                 "}\n";
1462         */
1463         programCollection.spirvAsmSources.add("fragment") <<
1464                 "; SPIR-V\n"
1465                 "; Version: 1.3\n"
1466                 "; Generator: Khronos Glslang Reference Front End; 2\n"
1467                 "; Bound: 14\n"
1468                 "; Schema: 0\n"
1469                 "OpCapability Shader\n"
1470                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1471                 "OpMemoryModel Logical GLSL450\n"
1472                 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
1473                 "OpExecutionMode %4 OriginUpperLeft\n"
1474                 "OpDecorate %8 Location 0\n"
1475                 "OpDecorate %11 Location 0\n"
1476                 "%2 = OpTypeVoid\n"
1477                 "%3 = OpTypeFunction %2\n"
1478                 "%6 = OpTypeInt 32 0\n"
1479                 "%7 = OpTypePointer Output %6\n"
1480                 "%8 = OpVariable %7 Output\n"
1481                 "%9 = OpTypeFloat 32\n"
1482                 "%10 = OpTypePointer Input %9\n"
1483                 "%11 = OpVariable %10 Input\n"
1484                 "%4 = OpFunction %2 None %3\n"
1485                 "%5 = OpLabel\n"
1486                 "%12 = OpLoad %9 %11\n"
1487                 "%13 = OpConvertFToU %6 %12\n"
1488                 "OpStore %8 %13\n"
1489                 "OpReturn\n"
1490                 "OpFunctionEnd\n";
1491 }
1492
1493 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
1494 {
1495         /*
1496                 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1497                 "#extension GL_EXT_tessellation_shader : require\n"
1498                 "layout(vertices = 2) out;\n"
1499                 "void main (void)\n"
1500                 "{\n"
1501                 "  if (gl_InvocationID == 0)\n"
1502                   {\n"
1503                 "    gl_TessLevelOuter[0] = 1.0f;\n"
1504                 "    gl_TessLevelOuter[1] = 1.0f;\n"
1505                 "  }\n"
1506                 "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1507                 "}\n";
1508         */
1509         programCollection.spirvAsmSources.add("tesc") <<
1510                 "; SPIR-V\n"
1511                 "; Version: 1.3\n"
1512                 "; Generator: Khronos Glslang Reference Front End; 2\n"
1513                 "; Bound: 46\n"
1514                 "; Schema: 0\n"
1515                 "OpCapability Tessellation\n"
1516                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1517                 "OpMemoryModel Logical GLSL450\n"
1518                 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
1519                 "OpExecutionMode %4 OutputVertices 2\n"
1520                 "OpDecorate %8 BuiltIn InvocationId\n"
1521                 "OpDecorate %20 Patch\n"
1522                 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1523                 "OpMemberDecorate %29 0 BuiltIn Position\n"
1524                 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1525                 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1526                 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1527                 "OpDecorate %29 Block\n"
1528                 "OpMemberDecorate %35 0 BuiltIn Position\n"
1529                 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
1530                 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
1531                 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
1532                 "OpDecorate %35 Block\n"
1533                 "%2 = OpTypeVoid\n"
1534                 "%3 = OpTypeFunction %2\n"
1535                 "%6 = OpTypeInt 32 1\n"
1536                 "%7 = OpTypePointer Input %6\n"
1537                 "%8 = OpVariable %7 Input\n"
1538                 "%10 = OpConstant %6 0\n"
1539                 "%11 = OpTypeBool\n"
1540                 "%15 = OpTypeFloat 32\n"
1541                 "%16 = OpTypeInt 32 0\n"
1542                 "%17 = OpConstant %16 4\n"
1543                 "%18 = OpTypeArray %15 %17\n"
1544                 "%19 = OpTypePointer Output %18\n"
1545                 "%20 = OpVariable %19 Output\n"
1546                 "%21 = OpConstant %15 1\n"
1547                 "%22 = OpTypePointer Output %15\n"
1548                 "%24 = OpConstant %6 1\n"
1549                 "%26 = OpTypeVector %15 4\n"
1550                 "%27 = OpConstant %16 1\n"
1551                 "%28 = OpTypeArray %15 %27\n"
1552                 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1553                 "%30 = OpConstant %16 2\n"
1554                 "%31 = OpTypeArray %29 %30\n"
1555                 "%32 = OpTypePointer Output %31\n"
1556                 "%33 = OpVariable %32 Output\n"
1557                 "%35 = OpTypeStruct %26 %15 %28 %28\n"
1558                 "%36 = OpConstant %16 32\n"
1559                 "%37 = OpTypeArray %35 %36\n"
1560                 "%38 = OpTypePointer Input %37\n"
1561                 "%39 = OpVariable %38 Input\n"
1562                 "%41 = OpTypePointer Input %26\n"
1563                 "%44 = OpTypePointer Output %26\n"
1564                 "%4 = OpFunction %2 None %3\n"
1565                 "%5 = OpLabel\n"
1566                 "%9 = OpLoad %6 %8\n"
1567                 "%12 = OpIEqual %11 %9 %10\n"
1568                 "OpSelectionMerge %14 None\n"
1569                 "OpBranchConditional %12 %13 %14\n"
1570                 "%13 = OpLabel\n"
1571                 "%23 = OpAccessChain %22 %20 %10\n"
1572                 "OpStore %23 %21\n"
1573                 "%25 = OpAccessChain %22 %20 %24\n"
1574                 "OpStore %25 %21\n"
1575                 "OpBranch %14\n"
1576                 "%14 = OpLabel\n"
1577                 "%34 = OpLoad %6 %8\n"
1578                 "%40 = OpLoad %6 %8\n"
1579                 "%42 = OpAccessChain %41 %39 %40 %10\n"
1580                 "%43 = OpLoad %26 %42\n"
1581                 "%45 = OpAccessChain %44 %33 %34 %10\n"
1582                 "OpStore %45 %43\n"
1583                 "OpReturn\n"
1584                 "OpFunctionEnd\n";
1585 }
1586
1587 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
1588 {
1589         /*
1590                 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1591                 "#extension GL_EXT_tessellation_shader : require\n"
1592                 "layout(isolines, equal_spacing, ccw ) in;\n"
1593                 "layout(location = 0) in float in_color[];\n"
1594                 "layout(location = 0) out float out_color;\n"
1595                 "\n"
1596                 "void main (void)\n"
1597                 "{\n"
1598                 "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1599                 "  out_color = in_color[0];\n"
1600                 "}\n";
1601         */
1602         programCollection.spirvAsmSources.add("tese") <<
1603                 "; SPIR-V\n"
1604                 "; Version: 1.3\n"
1605                 "; Generator: Khronos Glslang Reference Front End; 2\n"
1606                 "; Bound: 45\n"
1607                 "; Schema: 0\n"
1608                 "OpCapability Tessellation\n"
1609                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1610                 "OpMemoryModel Logical GLSL450\n"
1611                 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
1612                 "OpExecutionMode %4 Isolines\n"
1613                 "OpExecutionMode %4 SpacingEqual\n"
1614                 "OpExecutionMode %4 VertexOrderCcw\n"
1615                 "OpMemberDecorate %11 0 BuiltIn Position\n"
1616                 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1617                 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1618                 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1619                 "OpDecorate %11 Block\n"
1620                 "OpMemberDecorate %16 0 BuiltIn Position\n"
1621                 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
1622                 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
1623                 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
1624                 "OpDecorate %16 Block\n"
1625                 "OpDecorate %29 BuiltIn TessCoord\n"
1626                 "OpDecorate %39 Location 0\n"
1627                 "OpDecorate %42 Location 0\n"
1628                 "%2 = OpTypeVoid\n"
1629                 "%3 = OpTypeFunction %2\n"
1630                 "%6 = OpTypeFloat 32\n"
1631                 "%7 = OpTypeVector %6 4\n"
1632                 "%8 = OpTypeInt 32 0\n"
1633                 "%9 = OpConstant %8 1\n"
1634                 "%10 = OpTypeArray %6 %9\n"
1635                 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1636                 "%12 = OpTypePointer Output %11\n"
1637                 "%13 = OpVariable %12 Output\n"
1638                 "%14 = OpTypeInt 32 1\n"
1639                 "%15 = OpConstant %14 0\n"
1640                 "%16 = OpTypeStruct %7 %6 %10 %10\n"
1641                 "%17 = OpConstant %8 32\n"
1642                 "%18 = OpTypeArray %16 %17\n"
1643                 "%19 = OpTypePointer Input %18\n"
1644                 "%20 = OpVariable %19 Input\n"
1645                 "%21 = OpTypePointer Input %7\n"
1646                 "%24 = OpConstant %14 1\n"
1647                 "%27 = OpTypeVector %6 3\n"
1648                 "%28 = OpTypePointer Input %27\n"
1649                 "%29 = OpVariable %28 Input\n"
1650                 "%30 = OpConstant %8 0\n"
1651                 "%31 = OpTypePointer Input %6\n"
1652                 "%36 = OpTypePointer Output %7\n"
1653                 "%38 = OpTypePointer Output %6\n"
1654                 "%39 = OpVariable %38 Output\n"
1655                 "%40 = OpTypeArray %6 %17\n"
1656                 "%41 = OpTypePointer Input %40\n"
1657                 "%42 = OpVariable %41 Input\n"
1658                 "%4 = OpFunction %2 None %3\n"
1659                 "%5 = OpLabel\n"
1660                 "%22 = OpAccessChain %21 %20 %15 %15\n"
1661                 "%23 = OpLoad %7 %22\n"
1662                 "%25 = OpAccessChain %21 %20 %24 %15\n"
1663                 "%26 = OpLoad %7 %25\n"
1664                 "%32 = OpAccessChain %31 %29 %30\n"
1665                 "%33 = OpLoad %6 %32\n"
1666                 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
1667                 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
1668                 "%37 = OpAccessChain %36 %13 %15\n"
1669                 "OpStore %37 %35\n"
1670                 "%43 = OpAccessChain %31 %42 %15\n"
1671                 "%44 = OpLoad %6 %43\n"
1672                 "OpStore %39 %44\n"
1673                 "OpReturn\n"
1674                 "OpFunctionEnd\n";
1675 }
1676
1677 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
1678 {
1679         tcu::StringTemplate geometryTemplate(glslTemplate);
1680
1681         map<string, string>             linesParams;
1682         linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
1683
1684         map<string, string>             pointsParams;
1685         pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
1686
1687         collection.add("geometry_lines")        << glu::GeometrySource(geometryTemplate.specialize(linesParams))        << options;
1688         collection.add("geometry_points")       << glu::GeometrySource(geometryTemplate.specialize(pointsParams))       << options;
1689 }
1690
1691 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
1692 {
1693         tcu::StringTemplate geometryTemplate(spirvTemplate);
1694
1695         map<string, string>             linesParams;
1696         linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
1697
1698         map<string, string>             pointsParams;
1699         pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
1700
1701         collection.add("geometry_lines")        << geometryTemplate.specialize(linesParams)             << options;
1702         collection.add("geometry_points")       << geometryTemplate.specialize(pointsParams)    << options;
1703 }
1704
1705 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
1706 {
1707         const vk::VkFormat format = data.format;
1708         const vk::VkDeviceSize size = data.numElements *
1709                 (data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
1710         if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
1711         {
1712                 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
1713
1714                 switch (format)
1715                 {
1716                         default:
1717                                 DE_FATAL("Illegal buffer format");
1718                                 break;
1719                         case VK_FORMAT_R8_SINT:
1720                         case VK_FORMAT_R8G8_SINT:
1721                         case VK_FORMAT_R8G8B8_SINT:
1722                         case VK_FORMAT_R8G8B8A8_SINT:
1723                         case VK_FORMAT_R8_UINT:
1724                         case VK_FORMAT_R8G8_UINT:
1725                         case VK_FORMAT_R8G8B8_UINT:
1726                         case VK_FORMAT_R8G8B8A8_UINT:
1727                         {
1728                                 deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
1729
1730                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
1731                                 {
1732                                         ptr[k] = rnd.getUint8();
1733                                 }
1734                         }
1735                         break;
1736                         case VK_FORMAT_R16_SINT:
1737                         case VK_FORMAT_R16G16_SINT:
1738                         case VK_FORMAT_R16G16B16_SINT:
1739                         case VK_FORMAT_R16G16B16A16_SINT:
1740                         case VK_FORMAT_R16_UINT:
1741                         case VK_FORMAT_R16G16_UINT:
1742                         case VK_FORMAT_R16G16B16_UINT:
1743                         case VK_FORMAT_R16G16B16A16_UINT:
1744                         {
1745                                 deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
1746
1747                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
1748                                 {
1749                                         ptr[k] = rnd.getUint16();
1750                                 }
1751                         }
1752                         break;
1753                         case VK_FORMAT_R8_USCALED:
1754                         case VK_FORMAT_R8G8_USCALED:
1755                         case VK_FORMAT_R8G8B8_USCALED:
1756                         case VK_FORMAT_R8G8B8A8_USCALED:
1757                         {
1758                                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1759
1760                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
1761                                 {
1762                                         deUint32 r = rnd.getUint32();
1763                                         ptr[k] = (r & 1) ? r : 0;
1764                                 }
1765                         }
1766                         break;
1767                         case VK_FORMAT_R32_SINT:
1768                         case VK_FORMAT_R32G32_SINT:
1769                         case VK_FORMAT_R32G32B32_SINT:
1770                         case VK_FORMAT_R32G32B32A32_SINT:
1771                         case VK_FORMAT_R32_UINT:
1772                         case VK_FORMAT_R32G32_UINT:
1773                         case VK_FORMAT_R32G32B32_UINT:
1774                         case VK_FORMAT_R32G32B32A32_UINT:
1775                         {
1776                                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1777
1778                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
1779                                 {
1780                                         ptr[k] = rnd.getUint32();
1781                                 }
1782                         }
1783                         break;
1784                         case VK_FORMAT_R64_SINT:
1785                         case VK_FORMAT_R64G64_SINT:
1786                         case VK_FORMAT_R64G64B64_SINT:
1787                         case VK_FORMAT_R64G64B64A64_SINT:
1788                         case VK_FORMAT_R64_UINT:
1789                         case VK_FORMAT_R64G64_UINT:
1790                         case VK_FORMAT_R64G64B64_UINT:
1791                         case VK_FORMAT_R64G64B64A64_UINT:
1792                         {
1793                                 deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
1794
1795                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
1796                                 {
1797                                         ptr[k] = rnd.getUint64();
1798                                 }
1799                         }
1800                         break;
1801                         case VK_FORMAT_R16_SFLOAT:
1802                         case VK_FORMAT_R16G16_SFLOAT:
1803                         case VK_FORMAT_R16G16B16_SFLOAT:
1804                         case VK_FORMAT_R16G16B16A16_SFLOAT:
1805                         {
1806                                 deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
1807
1808                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
1809                                 {
1810                                         ptr[k] = deFloat32To16(rnd.getFloat());
1811                                 }
1812                         }
1813                         break;
1814                         case VK_FORMAT_R32_SFLOAT:
1815                         case VK_FORMAT_R32G32_SFLOAT:
1816                         case VK_FORMAT_R32G32B32_SFLOAT:
1817                         case VK_FORMAT_R32G32B32A32_SFLOAT:
1818                         {
1819                                 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
1820
1821                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
1822                                 {
1823                                         ptr[k] = rnd.getFloat();
1824                                 }
1825                         }
1826                         break;
1827                         case VK_FORMAT_R64_SFLOAT:
1828                         case VK_FORMAT_R64G64_SFLOAT:
1829                         case VK_FORMAT_R64G64B64_SFLOAT:
1830                         case VK_FORMAT_R64G64B64A64_SFLOAT:
1831                         {
1832                                 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
1833
1834                                 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
1835                                 {
1836                                         ptr[k] = rnd.getDouble();
1837                                 }
1838                         }
1839                         break;
1840                 }
1841         }
1842         else if (subgroups::SSBOData::InitializeZero == data.initializeType)
1843         {
1844                 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1845
1846                 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
1847                 {
1848                         ptr[k] = 0;
1849                 }
1850         }
1851
1852         if (subgroups::SSBOData::InitializeNone != data.initializeType)
1853         {
1854                 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1855         }
1856 }
1857
1858 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
1859 {
1860         switch(shaderStage)
1861         {
1862                 case VK_SHADER_STAGE_VERTEX_BIT:
1863                         return 0u;
1864                         break;
1865                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1866                         return 1u;
1867                         break;
1868                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1869                         return 2u;
1870                         break;
1871                 case VK_SHADER_STAGE_GEOMETRY_BIT:
1872                         return 3u;
1873                         break;
1874                 default:
1875                         DE_ASSERT(0);
1876                         return -1;
1877         }
1878         DE_ASSERT(0);
1879         return -1;
1880 }
1881
1882 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (
1883         Context& context, VkFormat format, SSBOData* extraData,
1884         deUint32 extraDataCount,
1885         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1886         const VkShaderStageFlags shaderStage)
1887 {
1888         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
1889         const VkDevice                                                  device                                  = context.getDevice();
1890         const deUint32                                                  maxWidth                                = getMaxWidth();
1891         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
1892         DescriptorSetLayoutBuilder                              layoutBuilder;
1893         DescriptorPoolBuilder                                   poolBuilder;
1894         DescriptorSetUpdateBuilder                              updateBuilder;
1895         Move <VkDescriptorPool>                                 descriptorPool;
1896         Move <VkDescriptorSet>                                  descriptorSet;
1897
1898         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(vk, device,
1899                                                                                                                                                 context.getBinaryCollection().get("vert"), 0u));
1900         const Unique<VkShaderModule>                    teCtrlShaderModule              (createShaderModule(vk, device,
1901                                                                                                                                                 context.getBinaryCollection().get("tesc"), 0u));
1902         const Unique<VkShaderModule>                    teEvalShaderModule              (createShaderModule(vk, device,
1903                                                                                                                                                 context.getBinaryCollection().get("tese"), 0u));
1904         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(vk, device,
1905                                                                                                                                         context.getBinaryCollection().get("fragment"), 0u));
1906         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
1907
1908         const VkVertexInputBindingDescription   vertexInputBinding              =
1909         {
1910                 0u,                                                                                     // binding;
1911                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
1912                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
1913         };
1914
1915         const VkVertexInputAttributeDescription vertexInputAttribute    =
1916         {
1917                 0u,
1918                 0u,
1919                 VK_FORMAT_R32G32B32A32_SFLOAT,
1920                 0u
1921         };
1922
1923         for (deUint32 i = 0u; i < extraDataCount; i++)
1924         {
1925                 if (extraData[i].isImage)
1926                 {
1927                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1928                 }
1929                 else
1930                 {
1931                         vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
1932                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
1933                 }
1934                 const Allocation& alloc = inputBuffers[i]->getAllocation();
1935                 initializeMemory(context, alloc, extraData[i]);
1936         }
1937
1938         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1939                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
1940
1941         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(vk, device));
1942
1943         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
1944
1945         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
1946                                                                                                                                         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
1947                                                                                                                                         VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1948                                                                                                                                         *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
1949                                                                                                                                         *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
1950
1951         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1952                 poolBuilder.addType(inputBuffers[ndx]->getType());
1953
1954         if (extraDataCount > 0)
1955         {
1956                 descriptorPool = poolBuilder.build(vk, device,
1957                                                         VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1958                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
1959         }
1960
1961         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
1962         {
1963                 if (inputBuffers[buffersNdx]->isImage())
1964                 {
1965                         VkDescriptorImageInfo info =
1966                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
1967                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1968
1969                         updateBuilder.writeSingle(*descriptorSet,
1970                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1971                                                                                 inputBuffers[buffersNdx]->getType(), &info);
1972                 }
1973                 else
1974                 {
1975                         VkDescriptorBufferInfo info =
1976                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
1977                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
1978
1979                         updateBuilder.writeSingle(*descriptorSet,
1980                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1981                                                                                 inputBuffers[buffersNdx]->getType(), &info);
1982                 }
1983         }
1984
1985         updateBuilder.update(vk, device);
1986
1987         const VkQueue                                                   queue                                   = context.getUniversalQueue();
1988         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
1989         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
1990         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
1991         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
1992         const vk::VkDeviceSize                                  vertexBufferSize                = 2ull * maxWidth * sizeof(tcu::Vec4);
1993         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1994         unsigned                                                                totalIterations                 = 0u;
1995         unsigned                                                                failedIterations                = 0u;
1996         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1997
1998         {
1999                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
2000                 std::vector<tcu::Vec4>  data                            (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2001                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
2002                 float                                   leftHandPosition        = -1.0f;
2003
2004                 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2005                 {
2006                         data[ndx][0] = leftHandPosition;
2007                         leftHandPosition += pixelSize;
2008                         data[ndx+1][0] = leftHandPosition;
2009                 }
2010
2011                 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2012                 flushAlloc(vk, device, alloc);
2013         }
2014
2015         const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2016         const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
2017         const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
2018         const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2019         Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2020         const VkDeviceSize                      vertexBufferOffset      = 0u;
2021
2022         for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2023         {
2024                 totalIterations++;
2025
2026                 beginCommandBuffer(vk, *cmdBuffer);
2027                 {
2028
2029                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2030                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2031
2032                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2033
2034                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2035
2036                         if (extraDataCount > 0)
2037                         {
2038                                 vk.cmdBindDescriptorSets(*cmdBuffer,
2039                                         VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2040                                         &descriptorSet.get(), 0u, DE_NULL);
2041                         }
2042
2043                         vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2044                         vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2045
2046                         endRenderPass(vk, *cmdBuffer);
2047
2048                         copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2049                         endCommandBuffer(vk, *cmdBuffer);
2050
2051                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2052                 }
2053
2054                 {
2055                         const Allocation& allocResult = imageBufferResult.getAllocation();
2056                         invalidateAlloc(vk, device, allocResult);
2057
2058                         std::vector<const void*> datas;
2059                         datas.push_back(allocResult.getHostPtr());
2060                         if (!checkResult(datas, width/2u, subgroupSize))
2061                                 failedIterations++;
2062                 }
2063         }
2064
2065         if (0 < failedIterations)
2066         {
2067                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2068
2069                 context.getTestContext().getLog()
2070                                 << TestLog::Message << valuesPassed << " / "
2071                                 << totalIterations << " values passed" << TestLog::EndMessage;
2072                 return tcu::TestStatus::fail("Failed!");
2073         }
2074
2075         return tcu::TestStatus::pass("OK");
2076 }
2077
2078 bool vkt::subgroups::check(std::vector<const void*> datas,
2079         deUint32 width, deUint32 ref)
2080 {
2081         const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2082
2083         for (deUint32 n = 0; n < width; ++n)
2084         {
2085                 if (data[n] != ref)
2086                 {
2087                         return false;
2088                 }
2089         }
2090
2091         return true;
2092 }
2093
2094 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
2095         const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2096         deUint32 ref)
2097 {
2098         const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2099         const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2100         const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2101
2102         return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2103 }
2104
2105 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
2106         Context& context, VkFormat format, SSBOData* extraData,
2107         deUint32 extraDataCount,
2108         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2109 {
2110         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
2111         const VkDevice                                                  device                                  = context.getDevice();
2112         const deUint32                                                  maxWidth                                = getMaxWidth();
2113         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
2114         DescriptorSetLayoutBuilder                              layoutBuilder;
2115         DescriptorPoolBuilder                                   poolBuilder;
2116         DescriptorSetUpdateBuilder                              updateBuilder;
2117         Move <VkDescriptorPool>                                 descriptorPool;
2118         Move <VkDescriptorSet>                                  descriptorSet;
2119
2120         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2121         const Unique<VkShaderModule>                    geometryShaderModule    (createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2122         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2123         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
2124         const VkVertexInputBindingDescription   vertexInputBinding              =
2125         {
2126                 0u,                                                                                     // binding;
2127                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
2128                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
2129         };
2130
2131         const VkVertexInputAttributeDescription vertexInputAttribute    =
2132         {
2133                 0u,
2134                 0u,
2135                 VK_FORMAT_R32G32B32A32_SFLOAT,
2136                 0u
2137         };
2138
2139         for (deUint32 i = 0u; i < extraDataCount; i++)
2140         {
2141                 if (extraData[i].isImage)
2142                 {
2143                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2144                 }
2145                 else
2146                 {
2147                         vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2148                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2149                 }
2150                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2151                 initializeMemory(context, alloc, extraData[i]);
2152         }
2153
2154         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2155                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2156
2157         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(vk, device));
2158
2159         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
2160
2161         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
2162                                                                                                                                         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2163                                                                                                                                         *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2164                                                                                                                                         *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
2165
2166         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2167                 poolBuilder.addType(inputBuffers[ndx]->getType());
2168
2169         if (extraDataCount > 0)
2170         {
2171                 descriptorPool = poolBuilder.build(vk, device,
2172                                                         VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2173                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2174         }
2175
2176         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2177         {
2178                 if (inputBuffers[buffersNdx]->isImage())
2179                 {
2180                         VkDescriptorImageInfo info =
2181                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2182                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2183
2184                         updateBuilder.writeSingle(*descriptorSet,
2185                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2186                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2187                 }
2188                 else
2189                 {
2190                         VkDescriptorBufferInfo info =
2191                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2192                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2193
2194                         updateBuilder.writeSingle(*descriptorSet,
2195                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2196                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2197                 }
2198         }
2199
2200         updateBuilder.update(vk, device);
2201
2202         const VkQueue                                                   queue                                   = context.getUniversalQueue();
2203         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
2204         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
2205         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
2206         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
2207         const vk::VkDeviceSize                                  vertexBufferSize                = maxWidth * sizeof(tcu::Vec4);
2208         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2209         unsigned                                                                totalIterations                 = 0u;
2210         unsigned                                                                failedIterations                = 0u;
2211         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2212
2213         {
2214                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
2215                 std::vector<tcu::Vec4>  data                            (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2216                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
2217                 float                                   leftHandPosition        = -1.0f;
2218
2219                 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2220                 {
2221                         data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2222                         leftHandPosition += pixelSize;
2223                 }
2224
2225                 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2226                 flushAlloc(vk, device, alloc);
2227         }
2228
2229         const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2230         const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
2231         const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
2232         const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2233         Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2234         const VkDeviceSize                      vertexBufferOffset      = 0u;
2235
2236         for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2237         {
2238                 totalIterations++;
2239
2240                 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2241                 {
2242                         const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2243                         initializeMemory(context, alloc, extraData[ndx]);
2244                 }
2245
2246                 beginCommandBuffer(vk, *cmdBuffer);
2247                 {
2248                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2249
2250                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2251
2252                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2253
2254                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2255
2256                         if (extraDataCount > 0)
2257                         {
2258                                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2259                                         &descriptorSet.get(), 0u, DE_NULL);
2260                         }
2261
2262                         vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2263
2264                         vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2265
2266                         endRenderPass(vk, *cmdBuffer);
2267
2268                         copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2269
2270                         endCommandBuffer(vk, *cmdBuffer);
2271
2272                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2273                 }
2274
2275                 {
2276                         const Allocation& allocResult = imageBufferResult.getAllocation();
2277                         invalidateAlloc(vk, device, allocResult);
2278
2279                         std::vector<const void*> datas;
2280                         datas.push_back(allocResult.getHostPtr());
2281                         if (!checkResult(datas, width, subgroupSize))
2282                                 failedIterations++;
2283                 }
2284         }
2285
2286         if (0 < failedIterations)
2287         {
2288                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2289
2290                 context.getTestContext().getLog()
2291                                 << TestLog::Message << valuesPassed << " / "
2292                                 << totalIterations << " values passed" << TestLog::EndMessage;
2293
2294                 return tcu::TestStatus::fail("Failed!");
2295         }
2296
2297         return tcu::TestStatus::pass("OK");
2298 }
2299
2300
2301 tcu::TestStatus vkt::subgroups::allStages(
2302         Context& context, VkFormat format, SSBOData* extraDatas,
2303         deUint32 extraDatasCount,
2304         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2305         const VkShaderStageFlags shaderStageTested)
2306 {
2307         const DeviceInterface&                  vk                                      = context.getDeviceInterface();
2308         const VkDevice                                  device                          = context.getDevice();
2309         const deUint32                                  maxWidth                        = getMaxWidth();
2310         vector<VkShaderStageFlagBits>   stagesVector;
2311         VkShaderStageFlags                              shaderStageRequired     = (VkShaderStageFlags)0ull;
2312
2313         Move<VkShaderModule>                    vertexShaderModule;
2314         Move<VkShaderModule>                    teCtrlShaderModule;
2315         Move<VkShaderModule>                    teEvalShaderModule;
2316         Move<VkShaderModule>                    geometryShaderModule;
2317         Move<VkShaderModule>                    fragmentShaderModule;
2318
2319         if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
2320         {
2321                 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
2322         }
2323         if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2324         {
2325                 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
2326                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
2327                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
2328         }
2329         if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2330         {
2331                 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
2332                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
2333                 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2334         }
2335         if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
2336         {
2337                 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
2338                 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
2339                 shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
2340         }
2341         if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2342         {
2343                 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
2344                 shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
2345         }
2346
2347         const deUint32  stagesCount     = static_cast<deUint32>(stagesVector.size());
2348         const string    vert            = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)                                    ? "vert_noSubgroup"             : "vert";
2349         const string    tesc            = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)              ? "tesc_noSubgroup"             : "tesc";
2350         const string    tese            = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)   ? "tese_noSubgroup"             : "tese";
2351
2352         shaderStageRequired = shaderStageTested | shaderStageRequired;
2353
2354         vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
2355         if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2356         {
2357                 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
2358                 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
2359         }
2360         if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
2361         {
2362                 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2363                 {
2364                         // tessellation shaders output line primitives
2365                         geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
2366                 }
2367                 else
2368                 {
2369                         // otherwise points are processed by geometry shader
2370                         geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
2371                 }
2372         }
2373         if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
2374                 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
2375
2376         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
2377
2378         DescriptorSetLayoutBuilder layoutBuilder;
2379         // The implicit result SSBO we use to store our outputs from the shader
2380         for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
2381         {
2382                 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
2383                 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
2384                 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2385
2386                 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
2387         }
2388
2389         for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
2390         {
2391                 const deUint32 datasNdx = ndx - stagesCount;
2392                 if (extraDatas[datasNdx].isImage)
2393                 {
2394                         inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
2395                 }
2396                 else
2397                 {
2398                         const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
2399                         inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2400                 }
2401
2402                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2403                 initializeMemory(context, alloc, extraDatas[datasNdx]);
2404
2405                 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
2406                                                                 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
2407         }
2408
2409         const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2410
2411         const Unique<VkPipelineLayout> pipelineLayout(
2412                 makePipelineLayout(vk, device, *descriptorSetLayout));
2413
2414         const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2415         const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2416                                                                                 shaderStageRequired,
2417                                                                                 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
2418                                                                                 *renderPass,
2419                                                                                 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
2420
2421         Move <VkDescriptorPool> descriptorPool;
2422         Move <VkDescriptorSet>  descriptorSet;
2423
2424         if (inputBuffers.size() > 0)
2425         {
2426                 DescriptorPoolBuilder poolBuilder;
2427
2428                 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
2429                 {
2430                         poolBuilder.addType(inputBuffers[ndx]->getType());
2431                 }
2432
2433                 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2434
2435                 // Create descriptor set
2436                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2437
2438                 DescriptorSetUpdateBuilder updateBuilder;
2439
2440                 for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
2441                 {
2442                         deUint32 binding;
2443                         if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
2444                         else binding = extraDatas[ndx -stagesCount].binding;
2445
2446                         if (inputBuffers[ndx]->isImage())
2447                         {
2448                                 VkDescriptorImageInfo info =
2449                                         makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2450                                                                                         inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2451
2452                                 updateBuilder.writeSingle(      *descriptorSet,
2453                                                                                         DescriptorSetUpdateBuilder::Location::binding(binding),
2454                                                                                         inputBuffers[ndx]->getType(), &info);
2455                         }
2456                         else
2457                         {
2458                                 VkDescriptorBufferInfo info =
2459                                         makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2460                                                         0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2461
2462                                 updateBuilder.writeSingle(      *descriptorSet,
2463                                                                                                         DescriptorSetUpdateBuilder::Location::binding(binding),
2464                                                                                                         inputBuffers[ndx]->getType(), &info);
2465                         }
2466                 }
2467
2468                 updateBuilder.update(vk, device);
2469         }
2470
2471         {
2472                 const VkQueue                                   queue                                   = context.getUniversalQueue();
2473                 const deUint32                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
2474                 const Unique<VkCommandPool>             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
2475                 const deUint32                                  subgroupSize                    = getSubgroupSize(context);
2476                 const Unique<VkCommandBuffer>   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
2477                 unsigned                                                totalIterations                 = 0u;
2478                 unsigned                                                failedIterations                = 0u;
2479                 Image                                                   resultImage                             (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2480                 const Unique<VkFramebuffer>             framebuffer                             (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
2481                 const VkViewport                                viewport                                = makeViewport(maxWidth, 1u);
2482                 const VkRect2D                                  scissor                                 = makeRect2D(maxWidth, 1u);
2483                 const vk::VkDeviceSize                  imageResultSize                 = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2484                 Buffer                                                  imageBufferResult               (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2485                 const VkImageSubresourceRange   subresourceRange                =
2486                 {
2487                         VK_IMAGE_ASPECT_COLOR_BIT,                                                                                      //VkImageAspectFlags    aspectMask
2488                         0u,                                                                                                                                     //deUint32                              baseMipLevel
2489                         1u,                                                                                                                                     //deUint32                              levelCount
2490                         0u,                                                                                                                                     //deUint32                              baseArrayLayer
2491                         1u                                                                                                                                      //deUint32                              layerCount
2492                 };
2493
2494                 const VkImageMemoryBarrier              colorAttachmentBarrier  = makeImageMemoryBarrier(
2495                         (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2496                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2497                         resultImage.getImage(), subresourceRange);
2498
2499                 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2500                 {
2501                         for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
2502                         {
2503                                 // re-init the data
2504                                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2505                                 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
2506                         }
2507
2508                         totalIterations++;
2509
2510                         beginCommandBuffer(vk, *cmdBuffer);
2511
2512                         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
2513
2514                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2515
2516                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2517
2518                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2519
2520                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2521
2522                         if (stagesCount + extraDatasCount > 0)
2523                                 vk.cmdBindDescriptorSets(*cmdBuffer,
2524                                                 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2525                                                 &descriptorSet.get(), 0u, DE_NULL);
2526
2527                         vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
2528
2529                         endRenderPass(vk, *cmdBuffer);
2530
2531                         copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2532
2533                         endCommandBuffer(vk, *cmdBuffer);
2534
2535                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2536
2537                         for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
2538                         {
2539                                 std::vector<const void*> datas;
2540                                 if (!inputBuffers[ndx]->isImage())
2541                                 {
2542                                         const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
2543                                         invalidateAlloc(vk, device, resultAlloc);
2544                                         // we always have our result data first
2545                                         datas.push_back(resultAlloc.getHostPtr());
2546                                 }
2547
2548                                 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2549                                 {
2550                                         const deUint32 datasNdx = index - stagesCount;
2551                                         if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
2552                                         {
2553                                                 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
2554                                                 invalidateAlloc(vk, device, resultAlloc);
2555                                                 // we always have our result data first
2556                                                 datas.push_back(resultAlloc.getHostPtr());
2557                                         }
2558                                 }
2559
2560                                 if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
2561                                         failedIterations++;
2562                         }
2563                         if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2564                         {
2565                                 std::vector<const void*> datas;
2566                                 const Allocation& resultAlloc = imageBufferResult.getAllocation();
2567                                 invalidateAlloc(vk, device, resultAlloc);
2568
2569                                 // we always have our result data first
2570                                 datas.push_back(resultAlloc.getHostPtr());
2571
2572                                 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2573                                 {
2574                                         const deUint32 datasNdx = index - stagesCount;
2575                                         if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
2576                                         {
2577                                                 const Allocation& alloc = inputBuffers[index]->getAllocation();
2578                                                 invalidateAlloc(vk, device, alloc);
2579                                                 // we always have our result data first
2580                                                 datas.push_back(alloc.getHostPtr());
2581                                         }
2582                                 }
2583
2584                                 if (!checkResult(datas, width, subgroupSize))
2585                                         failedIterations++;
2586                         }
2587
2588                         vk.resetCommandBuffer(*cmdBuffer, 0);
2589                 }
2590
2591                 if (0 < failedIterations)
2592                 {
2593                         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2594
2595                         context.getTestContext().getLog()
2596                                 << TestLog::Message << valuesPassed << " / "
2597                                 << totalIterations << " values passed" << TestLog::EndMessage;
2598
2599                         return tcu::TestStatus::fail("Failed!");
2600                 }
2601         }
2602
2603         return tcu::TestStatus::pass("OK");
2604 }
2605
2606 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
2607         SSBOData* extraData, deUint32 extraDataCount,
2608         bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2609 {
2610         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
2611         const VkDevice                                                  device                                  = context.getDevice();
2612         const VkQueue                                                   queue                                   = context.getUniversalQueue();
2613         const deUint32                                                  maxWidth                                = getMaxWidth();
2614         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
2615         vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
2616         DescriptorSetLayoutBuilder                              layoutBuilder;
2617         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2618         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2619         const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
2620
2621         const VkVertexInputBindingDescription   vertexInputBinding              =
2622         {
2623                 0u,                                                                                     // binding;
2624                 static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
2625                 VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
2626         };
2627
2628         const VkVertexInputAttributeDescription vertexInputAttribute    =
2629         {
2630                 0u,
2631                 0u,
2632                 VK_FORMAT_R32G32B32A32_SFLOAT,
2633                 0u
2634         };
2635
2636         for (deUint32 i = 0u; i < extraDataCount; i++)
2637         {
2638                 if (extraData[i].isImage)
2639                 {
2640                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2641                 }
2642                 else
2643                 {
2644                         vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2645                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2646                 }
2647                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2648                 initializeMemory(context, alloc, extraData[i]);
2649         }
2650
2651         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2652                 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
2653
2654         const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(vk, device));
2655
2656         const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
2657
2658         const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
2659                                                                                                                                                 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2660                                                                                                                                                 *vertexShaderModule, *fragmentShaderModule,
2661                                                                                                                                                 DE_NULL, DE_NULL, DE_NULL,
2662                                                                                                                                                 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
2663                                                                                                                                                 &vertexInputBinding, &vertexInputAttribute, true, format));
2664         DescriptorPoolBuilder                                   poolBuilder;
2665         DescriptorSetUpdateBuilder                              updateBuilder;
2666
2667
2668         for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2669                 poolBuilder.addType(inputBuffers[ndx]->getType());
2670
2671         Move <VkDescriptorPool>                                 descriptorPool;
2672         Move <VkDescriptorSet>                                  descriptorSet;
2673
2674         if (extraDataCount > 0)
2675         {
2676                 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2677                 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2678         }
2679
2680         for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2681         {
2682                 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2683                 initializeMemory(context, alloc, extraData[ndx]);
2684         }
2685
2686         for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2687         {
2688                 if (inputBuffers[buffersNdx]->isImage())
2689                 {
2690                         VkDescriptorImageInfo info =
2691                                 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2692                                                                                 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2693
2694                         updateBuilder.writeSingle(*descriptorSet,
2695                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2696                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2697                 }
2698                 else
2699                 {
2700                         VkDescriptorBufferInfo info =
2701                                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2702                                                                                 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2703
2704                         updateBuilder.writeSingle(*descriptorSet,
2705                                                                                 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2706                                                                                 inputBuffers[buffersNdx]->getType(), &info);
2707                 }
2708         }
2709         updateBuilder.update(vk, device);
2710
2711         const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(vk, device, queueFamilyIndex));
2712
2713         const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
2714
2715         const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
2716
2717         const vk::VkDeviceSize                                  vertexBufferSize                = maxWidth * sizeof(tcu::Vec4);
2718         Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2719
2720         unsigned                                                                totalIterations                 = 0u;
2721         unsigned                                                                failedIterations                = 0u;
2722
2723         Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2724
2725         {
2726                 const Allocation&               alloc                           = vertexBuffer.getAllocation();
2727                 std::vector<tcu::Vec4>  data                            (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2728                 const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
2729                 float                                   leftHandPosition        = -1.0f;
2730
2731                 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2732                 {
2733                         data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2734                         leftHandPosition += pixelSize;
2735                 }
2736
2737                 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2738                 flushAlloc(vk, device, alloc);
2739         }
2740
2741         const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2742         const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
2743         const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
2744         const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2745         Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2746         const VkDeviceSize                      vertexBufferOffset      = 0u;
2747
2748         for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2749         {
2750                 totalIterations++;
2751
2752                 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2753                 {
2754                         const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2755                         initializeMemory(context, alloc, extraData[ndx]);
2756                 }
2757
2758                 beginCommandBuffer(vk, *cmdBuffer);
2759                 {
2760                         vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2761
2762                         vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2763
2764                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2765
2766                         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2767
2768                         if (extraDataCount > 0)
2769                         {
2770                                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2771                                         &descriptorSet.get(), 0u, DE_NULL);
2772                         }
2773
2774                         vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2775
2776                         vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2777
2778                         endRenderPass(vk, *cmdBuffer);
2779
2780                         copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2781
2782                         endCommandBuffer(vk, *cmdBuffer);
2783
2784                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2785                 }
2786
2787                 {
2788                         const Allocation& allocResult = imageBufferResult.getAllocation();
2789                         invalidateAlloc(vk, device, allocResult);
2790
2791                         std::vector<const void*> datas;
2792                         datas.push_back(allocResult.getHostPtr());
2793                         if (!checkResult(datas, width, subgroupSize))
2794                                 failedIterations++;
2795                 }
2796         }
2797
2798         if (0 < failedIterations)
2799         {
2800                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2801
2802                 context.getTestContext().getLog()
2803                         << TestLog::Message << valuesPassed << " / "
2804                         << totalIterations << " values passed" << TestLog::EndMessage;
2805
2806                 return tcu::TestStatus::fail("Failed!");
2807         }
2808
2809         return tcu::TestStatus::pass("OK");
2810 }
2811
2812
2813 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest     (Context& context, VkFormat format, SSBOData* extraDatas,
2814         deUint32 extraDatasCount,
2815         bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
2816                                                 deUint32 height, deUint32 subgroupSize))
2817 {
2818         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
2819         const VkDevice                                                  device                                  = context.getDevice();
2820         const VkQueue                                                   queue                                   = context.getUniversalQueue();
2821         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
2822         const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule
2823                                                                                                                                                 (vk, device, context.getBinaryCollection().get("vert"), 0u));
2824         const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule
2825                                                                                                                                                 (vk, device, context.getBinaryCollection().get("fragment"), 0u));
2826
2827         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
2828
2829         for (deUint32 i = 0; i < extraDatasCount; i++)
2830         {
2831                 if (extraDatas[i].isImage)
2832                 {
2833                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
2834                                                                                 static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
2835                 }
2836                 else
2837                 {
2838                         vk::VkDeviceSize size =
2839                                 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
2840                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2841                 }
2842
2843                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2844                 initializeMemory(context, alloc, extraDatas[i]);
2845         }
2846
2847         DescriptorSetLayoutBuilder layoutBuilder;
2848
2849         for (deUint32 i = 0; i < extraDatasCount; i++)
2850         {
2851                 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
2852                                                                  VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
2853         }
2854
2855         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2856                 layoutBuilder.build(vk, device));
2857
2858         const Unique<VkPipelineLayout> pipelineLayout(
2859                 makePipelineLayout(vk, device, *descriptorSetLayout));
2860
2861         const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2862         const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2863                                                                           VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2864                                                                           *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
2865                                                                           DE_NULL, DE_NULL, true));
2866
2867         DescriptorPoolBuilder poolBuilder;
2868
2869         // To stop validation complaining, always add at least one type to pool.
2870         poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2871         for (deUint32 i = 0; i < extraDatasCount; i++)
2872         {
2873                 poolBuilder.addType(inputBuffers[i]->getType());
2874         }
2875
2876         Move<VkDescriptorPool> descriptorPool;
2877         // Create descriptor set
2878         Move<VkDescriptorSet> descriptorSet;
2879
2880         if (extraDatasCount > 0)
2881         {
2882                 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2883
2884                 descriptorSet   = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2885         }
2886
2887         DescriptorSetUpdateBuilder updateBuilder;
2888
2889         for (deUint32 i = 0; i < extraDatasCount; i++)
2890         {
2891                 if (inputBuffers[i]->isImage())
2892                 {
2893                         VkDescriptorImageInfo info =
2894                                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
2895                                                                                 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2896
2897                         updateBuilder.writeSingle(*descriptorSet,
2898                                                                           DescriptorSetUpdateBuilder::Location::binding(i),
2899                                                                           inputBuffers[i]->getType(), &info);
2900                 }
2901                 else
2902                 {
2903                         VkDescriptorBufferInfo info =
2904                                 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
2905                                                                                  0ull, inputBuffers[i]->getAsBuffer()->getSize());
2906
2907                         updateBuilder.writeSingle(*descriptorSet,
2908                                                                           DescriptorSetUpdateBuilder::Location::binding(i),
2909                                                                           inputBuffers[i]->getType(), &info);
2910                 }
2911         }
2912
2913         if (extraDatasCount > 0)
2914                 updateBuilder.update(vk, device);
2915
2916         const Unique<VkCommandPool>             cmdPool                         (makeCommandPool(vk, device, queueFamilyIndex));
2917
2918         const deUint32                                  subgroupSize            = getSubgroupSize(context);
2919
2920         const Unique<VkCommandBuffer>   cmdBuffer                       (makeCommandBuffer(context, *cmdPool));
2921
2922         unsigned totalIterations = 0;
2923         unsigned failedIterations = 0;
2924
2925         for (deUint32 width = 8; width <= subgroupSize; width *= 2)
2926         {
2927                 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
2928                 {
2929                         totalIterations++;
2930
2931                         // re-init the data
2932                         for (deUint32 i = 0; i < extraDatasCount; i++)
2933                         {
2934                                 const Allocation& alloc = inputBuffers[i]->getAllocation();
2935                                 initializeMemory(context, alloc, extraDatas[i]);
2936                         }
2937
2938                         VkDeviceSize formatSize = getFormatSizeInBytes(format);
2939                         const VkDeviceSize resultImageSizeInBytes =
2940                                 width * height * formatSize;
2941
2942                         Image resultImage(context, width, height, format,
2943                                                           VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
2944                                                           VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2945
2946                         Buffer resultBuffer(context, resultImageSizeInBytes,
2947                                                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT);
2948
2949                         const Unique<VkFramebuffer> framebuffer(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
2950
2951                         beginCommandBuffer(vk, *cmdBuffer);
2952
2953                         VkViewport viewport = makeViewport(width, height);
2954
2955                         vk.cmdSetViewport(
2956                                 *cmdBuffer, 0, 1, &viewport);
2957
2958                         VkRect2D scissor = {{0, 0}, {width, height}};
2959
2960                         vk.cmdSetScissor(
2961                                 *cmdBuffer, 0, 1, &scissor);
2962
2963                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
2964
2965                         vk.cmdBindPipeline(
2966                                 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2967
2968                         if (extraDatasCount > 0)
2969                         {
2970                                 vk.cmdBindDescriptorSets(*cmdBuffer,
2971                                                 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2972                                                 &descriptorSet.get(), 0u, DE_NULL);
2973                         }
2974
2975                         vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
2976
2977                         endRenderPass(vk, *cmdBuffer);
2978
2979                         copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2980
2981                         endCommandBuffer(vk, *cmdBuffer);
2982
2983                         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2984
2985                         std::vector<const void*> datas;
2986                         {
2987                                 const Allocation& resultAlloc = resultBuffer.getAllocation();
2988                                 invalidateAlloc(vk, device, resultAlloc);
2989
2990                                 // we always have our result data first
2991                                 datas.push_back(resultAlloc.getHostPtr());
2992                         }
2993
2994                         if (!checkResult(datas, width, height, subgroupSize))
2995                         {
2996                                 failedIterations++;
2997                         }
2998
2999                         vk.resetCommandBuffer(*cmdBuffer, 0);
3000                 }
3001         }
3002
3003         if (0 < failedIterations)
3004         {
3005                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3006
3007                 context.getTestContext().getLog()
3008                         << TestLog::Message << valuesPassed << " / "
3009                         << totalIterations << " values passed" << TestLog::EndMessage;
3010
3011                 return tcu::TestStatus::fail("Failed!");
3012         }
3013
3014         return tcu::TestStatus::pass("OK");
3015 }
3016
3017 tcu::TestStatus vkt::subgroups::makeComputeTest(
3018         Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
3019         bool (*checkResult)(std::vector<const void*> datas,
3020                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
3021                                                 deUint32 subgroupSize))
3022 {
3023         const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
3024         const VkDevice                                                  device                                  = context.getDevice();
3025         const VkQueue                                                   queue                                   = context.getUniversalQueue();
3026         const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
3027         VkDeviceSize                                                    elementSize                             = getFormatSizeInBytes(format);
3028
3029         const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
3030                                                                                   maxSupportedSubgroupSize() *
3031                                                                                   maxSupportedSubgroupSize();
3032         const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
3033
3034         Buffer resultBuffer(
3035                 context, resultBufferSizeInBytes);
3036
3037         std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
3038
3039         for (deUint32 i = 0; i < inputsCount; i++)
3040         {
3041                 if (inputs[i].isImage)
3042                 {
3043                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3044                                                                                 static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
3045                 }
3046                 else
3047                 {
3048                         vk::VkDeviceSize size =
3049                                 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3050                         inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3051                 }
3052
3053                 const Allocation& alloc = inputBuffers[i]->getAllocation();
3054                 initializeMemory(context, alloc, inputs[i]);
3055         }
3056
3057         DescriptorSetLayoutBuilder layoutBuilder;
3058         layoutBuilder.addBinding(
3059                 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3060
3061         for (deUint32 i = 0; i < inputsCount; i++)
3062         {
3063                 layoutBuilder.addBinding(
3064                         inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3065         }
3066
3067         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3068                 layoutBuilder.build(vk, device));
3069
3070         const Unique<VkShaderModule> shaderModule(
3071                 createShaderModule(vk, device,
3072                                                    context.getBinaryCollection().get("comp"), 0u));
3073         const Unique<VkPipelineLayout> pipelineLayout(
3074                 makePipelineLayout(vk, device, *descriptorSetLayout));
3075
3076         DescriptorPoolBuilder poolBuilder;
3077
3078         poolBuilder.addType(resultBuffer.getType());
3079
3080         for (deUint32 i = 0; i < inputsCount; i++)
3081         {
3082                 poolBuilder.addType(inputBuffers[i]->getType());
3083         }
3084
3085         const Unique<VkDescriptorPool> descriptorPool(
3086                 poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3087
3088         // Create descriptor set
3089         const Unique<VkDescriptorSet> descriptorSet(
3090                 makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3091
3092         DescriptorSetUpdateBuilder updateBuilder;
3093
3094         const VkDescriptorBufferInfo resultDescriptorInfo =
3095                 makeDescriptorBufferInfo(
3096                         resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
3097
3098         updateBuilder.writeSingle(*descriptorSet,
3099                                                           DescriptorSetUpdateBuilder::Location::binding(0u),
3100                                                           VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
3101
3102         for (deUint32 i = 0; i < inputsCount; i++)
3103         {
3104                 if (inputBuffers[i]->isImage())
3105                 {
3106                         VkDescriptorImageInfo info =
3107                                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3108                                                                                 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3109
3110                         updateBuilder.writeSingle(*descriptorSet,
3111                                                                           DescriptorSetUpdateBuilder::Location::binding(i + 1),
3112                                                                           inputBuffers[i]->getType(), &info);
3113                 }
3114                 else
3115                 {
3116                         vk::VkDeviceSize size =
3117                                 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3118                         VkDescriptorBufferInfo info =
3119                                 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
3120
3121                         updateBuilder.writeSingle(*descriptorSet,
3122                                                                           DescriptorSetUpdateBuilder::Location::binding(i + 1),
3123                                                                           inputBuffers[i]->getType(), &info);
3124                 }
3125         }
3126
3127         updateBuilder.update(vk, device);
3128
3129         const Unique<VkCommandPool>             cmdPool                         (makeCommandPool(vk, device, queueFamilyIndex));
3130
3131         unsigned totalIterations = 0;
3132         unsigned failedIterations = 0;
3133
3134         const deUint32 subgroupSize = getSubgroupSize(context);
3135
3136         const Unique<VkCommandBuffer> cmdBuffer(
3137                 makeCommandBuffer(context, *cmdPool));
3138
3139         const deUint32 numWorkgroups[3] = {4, 2, 2};
3140
3141         const deUint32 localSizesToTestCount = 15;
3142         deUint32 localSizesToTest[localSizesToTestCount][3] =
3143         {
3144                 {1, 1, 1},
3145                 {32, 4, 1},
3146                 {32, 1, 4},
3147                 {1, 32, 4},
3148                 {1, 4, 32},
3149                 {4, 1, 32},
3150                 {4, 32, 1},
3151                 {subgroupSize, 1, 1},
3152                 {1, subgroupSize, 1},
3153                 {1, 1, subgroupSize},
3154                 {3, 5, 7},
3155                 {128, 1, 1},
3156                 {1, 128, 1},
3157                 {1, 1, 64},
3158                 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
3159         };
3160
3161         Move<VkPipeline> lastPipeline(
3162                 makeComputePipeline(context, *pipelineLayout, *shaderModule,
3163                                                         localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]));
3164
3165         for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
3166         {
3167                 const deUint32 nextX = localSizesToTest[index + 1][0];
3168                 const deUint32 nextY = localSizesToTest[index + 1][1];
3169                 const deUint32 nextZ = localSizesToTest[index + 1][2];
3170
3171                 // we are running one test
3172                 totalIterations++;
3173
3174                 beginCommandBuffer(vk, *cmdBuffer);
3175
3176                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *lastPipeline);
3177
3178                 vk.cmdBindDescriptorSets(*cmdBuffer,
3179                                 VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
3180                                 &descriptorSet.get(), 0u, DE_NULL);
3181
3182                 vk.cmdDispatch(*cmdBuffer,numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
3183
3184                 endCommandBuffer(vk, *cmdBuffer);
3185
3186                 Move<VkPipeline> nextPipeline(
3187                         makeComputePipeline(context, *pipelineLayout, *shaderModule,
3188                                                                 nextX, nextY, nextZ));
3189
3190                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3191
3192                 std::vector<const void*> datas;
3193
3194                 {
3195                         const Allocation& resultAlloc = resultBuffer.getAllocation();
3196                         invalidateAlloc(vk, device, resultAlloc);
3197
3198                         // we always have our result data first
3199                         datas.push_back(resultAlloc.getHostPtr());
3200                 }
3201
3202                 for (deUint32 i = 0; i < inputsCount; i++)
3203                 {
3204                         if (!inputBuffers[i]->isImage())
3205                         {
3206                                 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
3207                                 invalidateAlloc(vk, device, resultAlloc);
3208
3209                                 // we always have our result data first
3210                                 datas.push_back(resultAlloc.getHostPtr());
3211                         }
3212                 }
3213
3214                 if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
3215                 {
3216                         failedIterations++;
3217                 }
3218
3219                 vk.resetCommandBuffer(*cmdBuffer, 0);
3220
3221                 lastPipeline = nextPipeline;
3222         }
3223
3224         if (0 < failedIterations)
3225         {
3226                 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3227
3228                 context.getTestContext().getLog()
3229                         << TestLog::Message << valuesPassed << " / "
3230                         << totalIterations << " values passed" << TestLog::EndMessage;
3231
3232                 return tcu::TestStatus::fail("Failed!");
3233         }
3234
3235         return tcu::TestStatus::pass("OK");
3236 }