1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
7 * Copyright (c) 2017 Codeplay Software Ltd.
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
23 * \brief Subgroups Tests Utils
24 */ /*--------------------------------------------------------------------*/
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "deFloat16.h"
28 #include "deRandom.hpp"
29 #include "tcuCommandLine.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "vkBarrierUtil.hpp"
32 #include "vkImageUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkObjUtil.hpp"
44 deUint32 getMaxWidth ()
49 deUint32 getNextWidth (const deUint32 width)
53 // This ensures we test every value up to 128 (the max subgroup size).
58 // And once we hit 128 we increment to only power of 2's to reduce testing time.
63 deUint32 getFormatSizeInBytes(const VkFormat format)
68 DE_FATAL("Unhandled format!");
70 case VK_FORMAT_R8_SINT:
71 case VK_FORMAT_R8_UINT:
72 return static_cast<deUint32>(sizeof(deInt8));
73 case VK_FORMAT_R8G8_SINT:
74 case VK_FORMAT_R8G8_UINT:
75 return static_cast<deUint32>(sizeof(deInt8) * 2);
76 case VK_FORMAT_R8G8B8_SINT:
77 case VK_FORMAT_R8G8B8_UINT:
78 case VK_FORMAT_R8G8B8A8_SINT:
79 case VK_FORMAT_R8G8B8A8_UINT:
80 return static_cast<deUint32>(sizeof(deInt8) * 4);
81 case VK_FORMAT_R16_SINT:
82 case VK_FORMAT_R16_UINT:
83 case VK_FORMAT_R16_SFLOAT:
84 return static_cast<deUint32>(sizeof(deInt16));
85 case VK_FORMAT_R16G16_SINT:
86 case VK_FORMAT_R16G16_UINT:
87 case VK_FORMAT_R16G16_SFLOAT:
88 return static_cast<deUint32>(sizeof(deInt16) * 2);
89 case VK_FORMAT_R16G16B16_UINT:
90 case VK_FORMAT_R16G16B16_SINT:
91 case VK_FORMAT_R16G16B16_SFLOAT:
92 case VK_FORMAT_R16G16B16A16_SINT:
93 case VK_FORMAT_R16G16B16A16_UINT:
94 case VK_FORMAT_R16G16B16A16_SFLOAT:
95 return static_cast<deUint32>(sizeof(deInt16) * 4);
96 case VK_FORMAT_R32_SINT:
97 case VK_FORMAT_R32_UINT:
98 case VK_FORMAT_R32_SFLOAT:
99 return static_cast<deUint32>(sizeof(deInt32));
100 case VK_FORMAT_R32G32_SINT:
101 case VK_FORMAT_R32G32_UINT:
102 case VK_FORMAT_R32G32_SFLOAT:
103 return static_cast<deUint32>(sizeof(deInt32) * 2);
104 case VK_FORMAT_R32G32B32_SINT:
105 case VK_FORMAT_R32G32B32_UINT:
106 case VK_FORMAT_R32G32B32_SFLOAT:
107 case VK_FORMAT_R32G32B32A32_SINT:
108 case VK_FORMAT_R32G32B32A32_UINT:
109 case VK_FORMAT_R32G32B32A32_SFLOAT:
110 return static_cast<deUint32>(sizeof(deInt32) * 4);
111 case VK_FORMAT_R64_SINT:
112 case VK_FORMAT_R64_UINT:
113 case VK_FORMAT_R64_SFLOAT:
114 return static_cast<deUint32>(sizeof(deInt64));
115 case VK_FORMAT_R64G64_SINT:
116 case VK_FORMAT_R64G64_UINT:
117 case VK_FORMAT_R64G64_SFLOAT:
118 return static_cast<deUint32>(sizeof(deInt64) * 2);
119 case VK_FORMAT_R64G64B64_SINT:
120 case VK_FORMAT_R64G64B64_UINT:
121 case VK_FORMAT_R64G64B64_SFLOAT:
122 case VK_FORMAT_R64G64B64A64_SINT:
123 case VK_FORMAT_R64G64B64A64_UINT:
124 case VK_FORMAT_R64G64B64A64_SFLOAT:
125 return static_cast<deUint32>(sizeof(deInt64) * 4);
126 // The below formats are used to represent bool and bvec* types. These
127 // types are passed to the shader as int and ivec* types, before the
128 // calculations are done as booleans. We need a distinct type here so
129 // that the shader generators can switch on it and generate the correct
130 // shader source for testing.
131 case VK_FORMAT_R8_USCALED:
132 return static_cast<deUint32>(sizeof(deInt32));
133 case VK_FORMAT_R8G8_USCALED:
134 return static_cast<deUint32>(sizeof(deInt32) * 2);
135 case VK_FORMAT_R8G8B8_USCALED:
136 case VK_FORMAT_R8G8B8A8_USCALED:
137 return static_cast<deUint32>(sizeof(deInt32) * 4);
141 deUint32 getElementSizeInBytes(
142 const VkFormat format,
143 const subgroups::SSBOData::InputDataLayoutType layout)
145 deUint32 bytes = getFormatSizeInBytes(format);
146 if (layout == subgroups::SSBOData::LayoutStd140)
147 return bytes < 16 ? 16 : bytes;
152 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
154 VkAttachmentReference colorReference = {
155 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
158 const VkSubpassDescription subpassDescription = {0u,
159 VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
160 DE_NULL, DE_NULL, 0, DE_NULL
163 const VkSubpassDependency subpassDependencies[2] = {
164 { VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
165 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
166 VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
167 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
168 VK_DEPENDENCY_BY_REGION_BIT
170 { 0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
171 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
172 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
173 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
174 VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
178 VkAttachmentDescription attachmentDescription = {0u, format,
179 VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
180 VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
181 VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
182 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
185 const VkRenderPassCreateInfo renderPassCreateInfo = {
186 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
187 &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
190 return createRenderPass(context.getDeviceInterface(), context.getDevice(),
191 &renderPassCreateInfo);
194 Move<VkPipeline> makeGraphicsPipeline(Context& context,
195 const VkPipelineLayout pipelineLayout,
196 const VkShaderStageFlags stages,
197 const VkShaderModule vertexShaderModule,
198 const VkShaderModule fragmentShaderModule,
199 const VkShaderModule geometryShaderModule,
200 const VkShaderModule tessellationControlModule,
201 const VkShaderModule tessellationEvaluationModule,
202 const VkRenderPass renderPass,
203 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
204 const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL,
205 const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL,
206 const bool frameBufferTests = false,
207 const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
209 std::vector<VkViewport> noViewports;
210 std::vector<VkRect2D> noScissors;
212 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
214 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
215 DE_NULL, // const void* pNext;
216 0u, // VkPipelineVertexInputStateCreateFlags flags;
217 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount;
218 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
219 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount;
220 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
223 const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
224 const VkColorComponentFlags colorComponent =
225 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
226 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
227 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
228 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
230 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
232 VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
233 VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
237 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
239 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
240 VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
241 { 0.0f, 0.0f, 0.0f, 0.0f }
244 const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
246 return vk::makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk
247 context.getDevice(), // const VkDevice device
248 pipelineLayout, // const VkPipelineLayout pipelineLayout
249 vertexShaderModule, // const VkShaderModule vertexShaderModule
250 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
251 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
252 geometryShaderModule, // const VkShaderModule geometryShaderModule
253 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
254 renderPass, // const VkRenderPass renderPass
255 noViewports, // const std::vector<VkViewport>& viewports
256 noScissors, // const std::vector<VkRect2D>& scissors
257 topology, // const VkPrimitiveTopology topology
258 0u, // const deUint32 subpass
259 patchControlPoints, // const deUint32 patchControlPoints
260 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
261 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
262 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
263 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
264 &colorBlendStateCreateInfo); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
267 Move<VkPipeline> makeComputePipeline(Context& context,
268 const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
269 const deUint32 pipelineCreateFlags, VkPipeline basePipelineHandle,
270 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
272 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
274 const vk::VkSpecializationMapEntry entries[3] =
276 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
277 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
278 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
281 const vk::VkSpecializationInfo info =
283 /* mapEntryCount = */ 3,
284 /* pMapEntries = */ entries,
285 /* dataSize = */ sizeof(localSize),
286 /* pData = */ localSize
289 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
291 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
292 DE_NULL, // const void* pNext;
293 0u, // VkPipelineShaderStageCreateFlags flags;
294 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
295 shaderModule, // VkShaderModule module;
296 "main", // const char* pName;
297 &info, // const VkSpecializationInfo* pSpecializationInfo;
300 const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
302 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
303 DE_NULL, // const void* pNext;
304 pipelineCreateFlags, // VkPipelineCreateFlags flags;
305 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
306 pipelineLayout, // VkPipelineLayout layout;
307 basePipelineHandle, // VkPipeline basePipelineHandle;
308 -1, // deInt32 basePipelineIndex;
311 return createComputePipeline(context.getDeviceInterface(),
312 context.getDevice(), DE_NULL, &pipelineCreateInfo);
315 Move<VkCommandBuffer> makeCommandBuffer(
316 Context& context, const VkCommandPool commandPool)
318 const VkCommandBufferAllocateInfo bufferAllocateParams =
320 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
321 DE_NULL, // const void* pNext;
322 commandPool, // VkCommandPool commandPool;
323 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
324 1u, // deUint32 bufferCount;
326 return allocateCommandBuffer(context.getDeviceInterface(),
327 context.getDevice(), &bufferAllocateParams);
340 Buffer* getAsBuffer()
342 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
343 return reinterpret_cast<Buffer* >(this);
348 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
349 return reinterpret_cast<Image*>(this);
352 virtual VkDescriptorType getType() const
356 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
360 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
364 Allocation& getAllocation() const
366 return *m_allocation;
369 virtual ~BufferOrImage() {}
372 explicit BufferOrImage(bool image) : m_isImage(image) {}
375 de::details::MovePtr<Allocation> m_allocation;
378 struct Buffer : public BufferOrImage
381 Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
382 : BufferOrImage (false)
383 , m_sizeInBytes (sizeInBytes)
386 const DeviceInterface& vkd = context.getDeviceInterface();
387 const VkDevice device = context.getDevice();
389 const vk::VkBufferCreateInfo bufferCreateInfo =
391 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
396 VK_SHARING_MODE_EXCLUSIVE,
400 m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
402 VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
404 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
405 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
408 virtual VkDescriptorType getType() const
410 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
412 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
414 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
417 VkBuffer getBuffer () const
422 const VkBuffer* getBufferPtr () const
427 VkDeviceSize getSize () const
429 return m_sizeInBytes;
433 Move<VkBuffer> m_buffer;
434 VkDeviceSize m_sizeInBytes;
435 const VkBufferUsageFlags m_usage;
438 struct Image : public BufferOrImage
440 explicit Image(Context& context, deUint32 width, deUint32 height,
441 VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
442 : BufferOrImage(true)
444 const DeviceInterface& vk = context.getDeviceInterface();
445 const VkDevice device = context.getDevice();
446 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
448 const VkImageCreateInfo imageCreateInfo =
450 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
451 format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
452 VK_IMAGE_TILING_OPTIMAL, usage,
453 VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
454 VK_IMAGE_LAYOUT_UNDEFINED
457 const VkComponentMapping componentMapping =
459 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
460 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
463 const VkImageSubresourceRange subresourceRange =
465 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
466 0u, //deUint32 baseMipLevel
467 1u, //deUint32 levelCount
468 0u, //deUint32 baseArrayLayer
469 1u //deUint32 layerCount
472 const VkSamplerCreateInfo samplerCreateInfo =
474 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
479 VK_SAMPLER_MIPMAP_MODE_NEAREST,
480 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
481 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
482 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
487 VK_COMPARE_OP_ALWAYS,
490 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
494 m_image = createImage(vk, device, &imageCreateInfo);
496 VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
499 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
501 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
503 const VkImageViewCreateInfo imageViewCreateInfo =
505 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
506 VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
510 m_imageView = createImageView(vk, device, &imageViewCreateInfo);
511 m_sampler = createSampler(vk, device, &samplerCreateInfo);
513 // Transition input image layouts
515 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
516 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
518 beginCommandBuffer(vk, *cmdBuffer);
520 const VkImageMemoryBarrier imageBarrier = makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
521 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
523 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
524 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
526 endCommandBuffer(vk, *cmdBuffer);
527 submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
531 VkImage getImage () const
536 VkImageView getImageView () const
541 VkSampler getSampler () const
547 Move<VkImage> m_image;
548 Move<VkImageView> m_imageView;
549 Move<VkSampler> m_sampler;
553 std::string vkt::subgroups::getSharedMemoryBallotHelper()
555 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
556 "uvec4 sharedMemoryBallot(bool vote)\n"
558 " uint groupOffset = gl_SubgroupID;\n"
559 " // One invocation in the group 0's the whole group's data\n"
560 " if (subgroupElect())\n"
562 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
564 " subgroupMemoryBarrierShared();\n"
567 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
568 " const highp uint bitToSet = 1u << invocationId;\n"
569 " switch (gl_SubgroupInvocationID / 32)\n"
571 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
572 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
573 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
574 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
577 " subgroupMemoryBarrierShared();\n"
578 " return superSecretComputeShaderHelper[groupOffset];\n"
582 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
584 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
585 "uint64_t sharedMemoryBallot(bool vote)\n"
587 " uint groupOffset = gl_SubgroupID;\n"
588 " // One invocation in the group 0's the whole group's data\n"
589 " if (subgroupElect())\n"
591 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
593 " subgroupMemoryBarrierShared();\n"
596 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
597 " const highp uint bitToSet = 1u << invocationId;\n"
598 " switch (gl_SubgroupInvocationID / 32)\n"
600 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
601 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
602 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
603 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
606 " subgroupMemoryBarrierShared();\n"
607 " return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
611 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
613 VkPhysicalDeviceSubgroupProperties subgroupProperties;
614 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
615 subgroupProperties.pNext = DE_NULL;
617 VkPhysicalDeviceProperties2 properties;
618 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
619 properties.pNext = &subgroupProperties;
621 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
623 return subgroupProperties.subgroupSize;
626 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
630 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
635 DE_FATAL("Unhandled stage!");
637 case VK_SHADER_STAGE_COMPUTE_BIT:
639 case VK_SHADER_STAGE_FRAGMENT_BIT:
641 case VK_SHADER_STAGE_VERTEX_BIT:
643 case VK_SHADER_STAGE_GEOMETRY_BIT:
645 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
646 return "tess_control";
647 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
652 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
657 DE_FATAL("Unknown subgroup feature category!");
659 case VK_SUBGROUP_FEATURE_BASIC_BIT:
660 return "VK_SUBGROUP_FEATURE_BASIC_BIT";
661 case VK_SUBGROUP_FEATURE_VOTE_BIT:
662 return "VK_SUBGROUP_FEATURE_VOTE_BIT";
663 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
664 return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
665 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
666 return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
667 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
668 return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
669 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
670 return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
671 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
672 return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
673 case VK_SUBGROUP_FEATURE_QUAD_BIT:
674 return "VK_SUBGROUP_FEATURE_QUAD_BIT";
678 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
685 " float pixelSize = 2.0f/1024.0f;\n"
686 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
687 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
688 " gl_PointSize = 1.0f;\n"
691 const std::string vertNoSubgroup =
694 "; Generator: Khronos Glslang Reference Front End; 1\n"
697 "OpCapability Shader\n"
698 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
699 "OpMemoryModel Logical GLSL450\n"
700 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
701 "OpMemberDecorate %20 0 BuiltIn Position\n"
702 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
703 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
704 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
705 "OpDecorate %20 Block\n"
706 "OpDecorate %26 BuiltIn VertexIndex\n"
708 "%3 = OpTypeFunction %2\n"
709 "%6 = OpTypeFloat 32\n"
710 "%7 = OpTypePointer Function %6\n"
711 "%9 = OpConstant %6 0.00195313\n"
712 "%12 = OpConstant %6 2\n"
713 "%14 = OpConstant %6 1\n"
714 "%16 = OpTypeVector %6 4\n"
715 "%17 = OpTypeInt 32 0\n"
716 "%18 = OpConstant %17 1\n"
717 "%19 = OpTypeArray %6 %18\n"
718 "%20 = OpTypeStruct %16 %6 %19 %19\n"
719 "%21 = OpTypePointer Output %20\n"
720 "%22 = OpVariable %21 Output\n"
721 "%23 = OpTypeInt 32 1\n"
722 "%24 = OpConstant %23 0\n"
723 "%25 = OpTypePointer Input %23\n"
724 "%26 = OpVariable %25 Input\n"
725 "%33 = OpConstant %6 0\n"
726 "%35 = OpTypePointer Output %16\n"
727 "%37 = OpConstant %23 1\n"
728 "%38 = OpTypePointer Output %6\n"
729 "%4 = OpFunction %2 None %3\n"
731 "%8 = OpVariable %7 Function\n"
732 "%10 = OpVariable %7 Function\n"
734 "%11 = OpLoad %6 %8\n"
735 "%13 = OpFDiv %6 %11 %12\n"
736 "%15 = OpFSub %6 %13 %14\n"
738 "%27 = OpLoad %23 %26\n"
739 "%28 = OpConvertSToF %6 %27\n"
740 "%29 = OpLoad %6 %8\n"
741 "%30 = OpFMul %6 %28 %29\n"
742 "%31 = OpLoad %6 %10\n"
743 "%32 = OpFAdd %6 %30 %31\n"
744 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
745 "%36 = OpAccessChain %35 %22 %24\n"
747 "%39 = OpAccessChain %38 %22 %37\n"
751 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
757 "layout(vertices=1) out;\n"
761 " if (gl_InvocationID == 0)\n"
763 " gl_TessLevelOuter[0] = 1.0f;\n"
764 " gl_TessLevelOuter[1] = 1.0f;\n"
766 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
769 const std::string tescNoSubgroup =
772 "; Generator: Khronos Glslang Reference Front End; 1\n"
775 "OpCapability Tessellation\n"
776 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
777 "OpMemoryModel Logical GLSL450\n"
778 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
779 "OpExecutionMode %4 OutputVertices 1\n"
780 "OpDecorate %8 BuiltIn InvocationId\n"
781 "OpDecorate %20 Patch\n"
782 "OpDecorate %20 BuiltIn TessLevelOuter\n"
783 "OpMemberDecorate %29 0 BuiltIn Position\n"
784 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
785 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
786 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
787 "OpDecorate %29 Block\n"
788 "OpMemberDecorate %34 0 BuiltIn Position\n"
789 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
790 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
791 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
792 "OpDecorate %34 Block\n"
794 "%3 = OpTypeFunction %2\n"
795 "%6 = OpTypeInt 32 1\n"
796 "%7 = OpTypePointer Input %6\n"
797 "%8 = OpVariable %7 Input\n"
798 "%10 = OpConstant %6 0\n"
800 "%15 = OpTypeFloat 32\n"
801 "%16 = OpTypeInt 32 0\n"
802 "%17 = OpConstant %16 4\n"
803 "%18 = OpTypeArray %15 %17\n"
804 "%19 = OpTypePointer Output %18\n"
805 "%20 = OpVariable %19 Output\n"
806 "%21 = OpConstant %15 1\n"
807 "%22 = OpTypePointer Output %15\n"
808 "%24 = OpConstant %6 1\n"
809 "%26 = OpTypeVector %15 4\n"
810 "%27 = OpConstant %16 1\n"
811 "%28 = OpTypeArray %15 %27\n"
812 "%29 = OpTypeStruct %26 %15 %28 %28\n"
813 "%30 = OpTypeArray %29 %27\n"
814 "%31 = OpTypePointer Output %30\n"
815 "%32 = OpVariable %31 Output\n"
816 "%34 = OpTypeStruct %26 %15 %28 %28\n"
817 "%35 = OpConstant %16 32\n"
818 "%36 = OpTypeArray %34 %35\n"
819 "%37 = OpTypePointer Input %36\n"
820 "%38 = OpVariable %37 Input\n"
821 "%40 = OpTypePointer Input %26\n"
822 "%43 = OpTypePointer Output %26\n"
823 "%4 = OpFunction %2 None %3\n"
825 "%9 = OpLoad %6 %8\n"
826 "%12 = OpIEqual %11 %9 %10\n"
827 "OpSelectionMerge %14 None\n"
828 "OpBranchConditional %12 %13 %14\n"
830 "%23 = OpAccessChain %22 %20 %10\n"
832 "%25 = OpAccessChain %22 %20 %24\n"
836 "%33 = OpLoad %6 %8\n"
837 "%39 = OpLoad %6 %8\n"
838 "%41 = OpAccessChain %40 %38 %39 %10\n"
839 "%42 = OpLoad %26 %41\n"
840 "%44 = OpAccessChain %43 %32 %33 %10\n"
844 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
850 "layout(isolines) in;\n"
854 " float pixelSize = 2.0f/1024.0f;\n"
855 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
858 const std::string teseNoSubgroup =
861 "; Generator: Khronos Glslang Reference Front End; 2\n"
864 "OpCapability Tessellation\n"
865 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
866 "OpMemoryModel Logical GLSL450\n"
867 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
868 "OpExecutionMode %4 Isolines\n"
869 "OpExecutionMode %4 SpacingEqual\n"
870 "OpExecutionMode %4 VertexOrderCcw\n"
871 "OpMemberDecorate %14 0 BuiltIn Position\n"
872 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
873 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
874 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
875 "OpDecorate %14 Block\n"
876 "OpMemberDecorate %19 0 BuiltIn Position\n"
877 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
878 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
879 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
880 "OpDecorate %19 Block\n"
881 "OpDecorate %29 BuiltIn TessCoord\n"
883 "%3 = OpTypeFunction %2\n"
884 "%6 = OpTypeFloat 32\n"
885 "%7 = OpTypePointer Function %6\n"
886 "%9 = OpConstant %6 0.00195313\n"
887 "%10 = OpTypeVector %6 4\n"
888 "%11 = OpTypeInt 32 0\n"
889 "%12 = OpConstant %11 1\n"
890 "%13 = OpTypeArray %6 %12\n"
891 "%14 = OpTypeStruct %10 %6 %13 %13\n"
892 "%15 = OpTypePointer Output %14\n"
893 "%16 = OpVariable %15 Output\n"
894 "%17 = OpTypeInt 32 1\n"
895 "%18 = OpConstant %17 0\n"
896 "%19 = OpTypeStruct %10 %6 %13 %13\n"
897 "%20 = OpConstant %11 32\n"
898 "%21 = OpTypeArray %19 %20\n"
899 "%22 = OpTypePointer Input %21\n"
900 "%23 = OpVariable %22 Input\n"
901 "%24 = OpTypePointer Input %10\n"
902 "%27 = OpTypeVector %6 3\n"
903 "%28 = OpTypePointer Input %27\n"
904 "%29 = OpVariable %28 Input\n"
905 "%30 = OpConstant %11 0\n"
906 "%31 = OpTypePointer Input %6\n"
907 "%36 = OpConstant %6 2\n"
908 "%40 = OpTypePointer Output %10\n"
909 "%4 = OpFunction %2 None %3\n"
911 "%8 = OpVariable %7 Function\n"
913 "%25 = OpAccessChain %24 %23 %18 %18\n"
914 "%26 = OpLoad %10 %25\n"
915 "%32 = OpAccessChain %31 %29 %30\n"
916 "%33 = OpLoad %6 %32\n"
917 "%34 = OpLoad %6 %8\n"
918 "%35 = OpFMul %6 %33 %34\n"
919 "%37 = OpFDiv %6 %35 %36\n"
920 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
921 "%39 = OpFAdd %10 %26 %38\n"
922 "%41 = OpAccessChain %40 %16 %18\n"
926 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
932 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
937 DE_FATAL("Unhandled stage!");
939 case VK_SHADER_STAGE_FRAGMENT_BIT:
944 " float pixelSize = 2.0f/1024.0f;\n"
945 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
946 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
948 case VK_SHADER_STAGE_GEOMETRY_BIT:
954 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
955 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
964 void vkt::subgroups::initStdFrameBufferPrograms( SourceCollections& programCollection,
965 const vk::ShaderBuildOptions& buildOptions,
966 VkShaderStageFlags shaderStage,
969 std::string extHeader,
971 std::string helperStr)
973 subgroups::setFragmentShaderFrameBuffer(programCollection);
975 if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
976 subgroups::setVertexShaderFrameBuffer(programCollection);
978 if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
980 std::ostringstream vertex;
981 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
983 << "layout(location = 0) in highp vec4 in_position;\n"
984 << "layout(location = 0) out float result;\n"
985 << "layout(set = 0, binding = 0) uniform Buffer1\n"
987 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
991 << "void main (void)\n"
993 << " uint tempRes;\n"
995 << " result = float(tempRes);\n"
996 << " gl_Position = in_position;\n"
997 << " gl_PointSize = 1.0f;\n"
999 programCollection.glslSources.add("vert")
1000 << glu::VertexSource(vertex.str()) << buildOptions;
1002 else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1004 std::ostringstream geometry;
1006 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1007 << extHeader.c_str()
1008 << "layout(points) in;\n"
1009 << "layout(points, max_vertices = 1) out;\n"
1010 << "layout(location = 0) out float out_color;\n"
1011 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1013 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1016 << helperStr.c_str()
1017 << "void main (void)\n"
1019 << " uint tempRes;\n"
1021 << " out_color = float(tempRes);\n"
1022 << " gl_Position = gl_in[0].gl_Position;\n"
1023 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1024 << " EmitVertex();\n"
1025 << " EndPrimitive();\n"
1028 programCollection.glslSources.add("geometry")
1029 << glu::GeometrySource(geometry.str()) << buildOptions;
1031 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1033 std::ostringstream controlSource;
1034 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1035 << extHeader.c_str()
1036 << "layout(vertices = 2) out;\n"
1037 << "layout(location = 0) out float out_color[];\n"
1038 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1040 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1043 << helperStr.c_str()
1044 << "void main (void)\n"
1046 << " if (gl_InvocationID == 0)\n"
1048 << " gl_TessLevelOuter[0] = 1.0f;\n"
1049 << " gl_TessLevelOuter[1] = 1.0f;\n"
1051 << " uint tempRes;\n"
1053 << " out_color[gl_InvocationID] = float(tempRes);\n"
1054 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1057 programCollection.glslSources.add("tesc")
1058 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1059 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1061 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1063 ostringstream evaluationSource;
1064 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1065 << extHeader.c_str()
1066 << "layout(isolines, equal_spacing, ccw ) in;\n"
1067 << "layout(location = 0) out float out_color;\n"
1068 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1070 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1073 << helperStr.c_str()
1074 << "void main (void)\n"
1076 << " uint tempRes;\n"
1078 << " out_color = float(tempRes);\n"
1079 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1082 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1083 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1087 DE_FATAL("Unsupported shader stage");
1091 void vkt::subgroups::initStdPrograms( vk::SourceCollections& programCollection,
1092 const vk::ShaderBuildOptions& buildOptions,
1093 vk::VkShaderStageFlags shaderStage,
1094 vk::VkFormat format,
1095 std::string extHeader,
1096 std::string testSrc,
1097 std::string helperStr)
1099 if (shaderStage == VK_SHADER_STAGE_COMPUTE_BIT)
1101 std::ostringstream src;
1103 src << "#version 450\n"
1104 << extHeader.c_str()
1105 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1106 "local_size_z_id = 2) in;\n"
1107 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1109 << " uint result[];\n"
1111 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
1113 << " " << subgroups::getFormatNameForGLSL(format) << " data[];\n"
1116 << helperStr.c_str()
1117 << "void main (void)\n"
1119 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1120 << " highp uint offset = globalSize.x * ((globalSize.y * "
1121 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1122 "gl_GlobalInvocationID.x;\n"
1123 << " uint tempRes;\n"
1125 << " result[offset] = tempRes;\n"
1128 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1132 const string vertex =
1135 "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1139 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1141 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1145 "void main (void)\n"
1149 " result[gl_VertexIndex] = tempRes;\n"
1150 " float pixelSize = 2.0f/1024.0f;\n"
1151 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1152 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1153 " gl_PointSize = 1.0f;\n"
1159 "layout(vertices=1) out;\n"
1160 "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
1164 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1166 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1170 "void main (void)\n"
1174 " result[gl_PrimitiveID] = tempRes;\n"
1175 " if (gl_InvocationID == 0)\n"
1177 " gl_TessLevelOuter[0] = 1.0f;\n"
1178 " gl_TessLevelOuter[1] = 1.0f;\n"
1180 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1186 "layout(isolines) in;\n"
1187 "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
1191 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1193 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1197 "void main (void)\n"
1201 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1202 " float pixelSize = 2.0f/1024.0f;\n"
1203 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1206 const string geometry =
1209 "layout(${TOPOLOGY}) in;\n"
1210 "layout(points, max_vertices = 1) out;\n"
1211 "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
1215 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1217 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1221 "void main (void)\n"
1225 " result[gl_PrimitiveIDIn] = tempRes;\n"
1226 " gl_Position = gl_in[0].gl_Position;\n"
1228 " EndPrimitive();\n"
1231 const string fragment =
1234 "layout(location = 0) out uint result;\n"
1235 "layout(set = 0, binding = 4, std430) readonly buffer Buffer1\n"
1237 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1240 "void main (void)\n"
1244 " result = tempRes;\n"
1247 subgroups::addNoSubgroupShader(programCollection);
1249 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1250 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1251 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1252 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1253 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1257 bool vkt::subgroups::isSubgroupSupported(Context& context)
1259 return context.contextSupports(vk::ApiVersion(1, 1, 0));
1262 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
1263 Context& context, const VkShaderStageFlags stage)
1265 VkPhysicalDeviceSubgroupProperties subgroupProperties;
1266 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1267 subgroupProperties.pNext = DE_NULL;
1269 VkPhysicalDeviceProperties2 properties;
1270 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1271 properties.pNext = &subgroupProperties;
1273 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1275 return (stage & subgroupProperties.supportedStages) ? true : false;
1278 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
1279 VkShaderStageFlags stage)
1285 case VK_SHADER_STAGE_COMPUTE_BIT:
1290 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
1292 VkSubgroupFeatureFlagBits bit) {
1293 VkPhysicalDeviceSubgroupProperties subgroupProperties;
1294 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1295 subgroupProperties.pNext = DE_NULL;
1297 VkPhysicalDeviceProperties2 properties;
1298 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1299 properties.pNext = &subgroupProperties;
1301 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1303 return (bit & subgroupProperties.supportedOperations) ? true : false;
1306 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
1308 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1309 context.getInstanceInterface(), context.getPhysicalDevice());
1310 return features.fragmentStoresAndAtomics ? true : false;
1313 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1315 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1316 context.getInstanceInterface(), context.getPhysicalDevice());
1317 return features.vertexPipelineStoresAndAtomics ? true : false;
1320 bool vkt::subgroups::isInt64SupportedForDevice(Context& context)
1322 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1323 context.getInstanceInterface(), context.getPhysicalDevice());
1324 return features.shaderInt64 ? true : false;
1327 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1329 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1330 context.getInstanceInterface(), context.getPhysicalDevice());
1331 return features.shaderTessellationAndGeometryPointSize ? true : false;
1334 bool vkt::subgroups::isFormatSupportedForDevice(Context& context, vk::VkFormat format)
1336 VkPhysicalDeviceShaderSubgroupExtendedTypesFeaturesKHR subgroupExtendedTypesFeatures;
1337 deMemset(&subgroupExtendedTypesFeatures, 0, sizeof(subgroupExtendedTypesFeatures));
1338 subgroupExtendedTypesFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES_KHR;
1339 subgroupExtendedTypesFeatures.pNext = DE_NULL;
1341 VkPhysicalDeviceShaderFloat16Int8FeaturesKHR float16Int8Features;
1342 deMemset(&float16Int8Features, 0, sizeof(float16Int8Features));
1343 float16Int8Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR;
1344 float16Int8Features.pNext = DE_NULL;
1346 VkPhysicalDeviceFeatures2 features2;
1347 deMemset(&features2, 0, sizeof(features2));
1348 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1349 features2.pNext = DE_NULL;
1351 VkPhysicalDevice16BitStorageFeatures storage16bit;
1352 deMemset(&storage16bit, 0, sizeof(storage16bit));
1353 storage16bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
1354 storage16bit.pNext = DE_NULL;
1355 bool is16bitStorageSupported = context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage");
1357 if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1358 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1360 features2.pNext = &subgroupExtendedTypesFeatures;
1361 subgroupExtendedTypesFeatures.pNext = &float16Int8Features;
1362 if ( is16bitStorageSupported )
1364 float16Int8Features.pNext = &storage16bit;
1369 const PlatformInterface& platformInterface = context.getPlatformInterface();
1370 const VkInstance instance = context.getInstance();
1371 const InstanceDriver instanceDriver (platformInterface, instance);
1373 instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1379 case VK_FORMAT_R16_SFLOAT:
1380 case VK_FORMAT_R16G16_SFLOAT:
1381 case VK_FORMAT_R16G16B16_SFLOAT:
1382 case VK_FORMAT_R16G16B16A16_SFLOAT:
1383 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderFloat16 & storage16bit.storageBuffer16BitAccess ? true : false;
1384 case VK_FORMAT_R64_SFLOAT:
1385 case VK_FORMAT_R64G64_SFLOAT:
1386 case VK_FORMAT_R64G64B64_SFLOAT:
1387 case VK_FORMAT_R64G64B64A64_SFLOAT:
1388 return features2.features.shaderFloat64 ? true : false;
1389 case VK_FORMAT_R8_SINT:
1390 case VK_FORMAT_R8G8_SINT:
1391 case VK_FORMAT_R8G8B8_SINT:
1392 case VK_FORMAT_R8G8B8A8_SINT:
1393 case VK_FORMAT_R8_UINT:
1394 case VK_FORMAT_R8G8_UINT:
1395 case VK_FORMAT_R8G8B8_UINT:
1396 case VK_FORMAT_R8G8B8A8_UINT:
1397 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderInt8 ? true : false;
1398 case VK_FORMAT_R16_SINT:
1399 case VK_FORMAT_R16G16_SINT:
1400 case VK_FORMAT_R16G16B16_SINT:
1401 case VK_FORMAT_R16G16B16A16_SINT:
1402 case VK_FORMAT_R16_UINT:
1403 case VK_FORMAT_R16G16_UINT:
1404 case VK_FORMAT_R16G16B16_UINT:
1405 case VK_FORMAT_R16G16B16A16_UINT:
1406 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt16 & storage16bit.storageBuffer16BitAccess ? true : false;
1407 case VK_FORMAT_R64_SINT:
1408 case VK_FORMAT_R64G64_SINT:
1409 case VK_FORMAT_R64G64B64_SINT:
1410 case VK_FORMAT_R64G64B64A64_SINT:
1411 case VK_FORMAT_R64_UINT:
1412 case VK_FORMAT_R64G64_UINT:
1413 case VK_FORMAT_R64G64B64_UINT:
1414 case VK_FORMAT_R64G64B64A64_UINT:
1415 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt64 ? true : false;
1419 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1424 DE_FATAL("Unhandled format!");
1426 case VK_FORMAT_R8_SINT:
1428 case VK_FORMAT_R8G8_SINT:
1430 case VK_FORMAT_R8G8B8_SINT:
1432 case VK_FORMAT_R8G8B8A8_SINT:
1434 case VK_FORMAT_R8_UINT:
1436 case VK_FORMAT_R8G8_UINT:
1438 case VK_FORMAT_R8G8B8_UINT:
1440 case VK_FORMAT_R8G8B8A8_UINT:
1442 case VK_FORMAT_R16_SINT:
1444 case VK_FORMAT_R16G16_SINT:
1446 case VK_FORMAT_R16G16B16_SINT:
1448 case VK_FORMAT_R16G16B16A16_SINT:
1450 case VK_FORMAT_R16_UINT:
1452 case VK_FORMAT_R16G16_UINT:
1454 case VK_FORMAT_R16G16B16_UINT:
1456 case VK_FORMAT_R16G16B16A16_UINT:
1458 case VK_FORMAT_R32_SINT:
1460 case VK_FORMAT_R32G32_SINT:
1462 case VK_FORMAT_R32G32B32_SINT:
1464 case VK_FORMAT_R32G32B32A32_SINT:
1466 case VK_FORMAT_R32_UINT:
1468 case VK_FORMAT_R32G32_UINT:
1470 case VK_FORMAT_R32G32B32_UINT:
1472 case VK_FORMAT_R32G32B32A32_UINT:
1474 case VK_FORMAT_R64_SINT:
1476 case VK_FORMAT_R64G64_SINT:
1478 case VK_FORMAT_R64G64B64_SINT:
1480 case VK_FORMAT_R64G64B64A64_SINT:
1482 case VK_FORMAT_R64_UINT:
1484 case VK_FORMAT_R64G64_UINT:
1486 case VK_FORMAT_R64G64B64_UINT:
1488 case VK_FORMAT_R64G64B64A64_UINT:
1490 case VK_FORMAT_R16_SFLOAT:
1492 case VK_FORMAT_R16G16_SFLOAT:
1494 case VK_FORMAT_R16G16B16_SFLOAT:
1496 case VK_FORMAT_R16G16B16A16_SFLOAT:
1498 case VK_FORMAT_R32_SFLOAT:
1500 case VK_FORMAT_R32G32_SFLOAT:
1502 case VK_FORMAT_R32G32B32_SFLOAT:
1504 case VK_FORMAT_R32G32B32A32_SFLOAT:
1506 case VK_FORMAT_R64_SFLOAT:
1508 case VK_FORMAT_R64G64_SFLOAT:
1510 case VK_FORMAT_R64G64B64_SFLOAT:
1512 case VK_FORMAT_R64G64B64A64_SFLOAT:
1514 case VK_FORMAT_R8_USCALED:
1516 case VK_FORMAT_R8G8_USCALED:
1518 case VK_FORMAT_R8G8B8_USCALED:
1520 case VK_FORMAT_R8G8B8A8_USCALED:
1525 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1531 case VK_FORMAT_R8_SINT:
1532 case VK_FORMAT_R8G8_SINT:
1533 case VK_FORMAT_R8G8B8_SINT:
1534 case VK_FORMAT_R8G8B8A8_SINT:
1535 case VK_FORMAT_R8_UINT:
1536 case VK_FORMAT_R8G8_UINT:
1537 case VK_FORMAT_R8G8B8_UINT:
1538 case VK_FORMAT_R8G8B8A8_UINT:
1539 return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1540 case VK_FORMAT_R16_SINT:
1541 case VK_FORMAT_R16G16_SINT:
1542 case VK_FORMAT_R16G16B16_SINT:
1543 case VK_FORMAT_R16G16B16A16_SINT:
1544 case VK_FORMAT_R16_UINT:
1545 case VK_FORMAT_R16G16_UINT:
1546 case VK_FORMAT_R16G16B16_UINT:
1547 case VK_FORMAT_R16G16B16A16_UINT:
1548 return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1549 case VK_FORMAT_R64_SINT:
1550 case VK_FORMAT_R64G64_SINT:
1551 case VK_FORMAT_R64G64B64_SINT:
1552 case VK_FORMAT_R64G64B64A64_SINT:
1553 case VK_FORMAT_R64_UINT:
1554 case VK_FORMAT_R64G64_UINT:
1555 case VK_FORMAT_R64G64B64_UINT:
1556 case VK_FORMAT_R64G64B64A64_UINT:
1557 return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1558 case VK_FORMAT_R16_SFLOAT:
1559 case VK_FORMAT_R16G16_SFLOAT:
1560 case VK_FORMAT_R16G16B16_SFLOAT:
1561 case VK_FORMAT_R16G16B16A16_SFLOAT:
1562 return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1566 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1568 std::vector<VkFormat> formats;
1570 formats.push_back(VK_FORMAT_R8_SINT);
1571 formats.push_back(VK_FORMAT_R8G8_SINT);
1572 formats.push_back(VK_FORMAT_R8G8B8_SINT);
1573 formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1574 formats.push_back(VK_FORMAT_R8_UINT);
1575 formats.push_back(VK_FORMAT_R8G8_UINT);
1576 formats.push_back(VK_FORMAT_R8G8B8_UINT);
1577 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1578 formats.push_back(VK_FORMAT_R16_SINT);
1579 formats.push_back(VK_FORMAT_R16G16_SINT);
1580 formats.push_back(VK_FORMAT_R16G16B16_SINT);
1581 formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1582 formats.push_back(VK_FORMAT_R16_UINT);
1583 formats.push_back(VK_FORMAT_R16G16_UINT);
1584 formats.push_back(VK_FORMAT_R16G16B16_UINT);
1585 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1586 formats.push_back(VK_FORMAT_R32_SINT);
1587 formats.push_back(VK_FORMAT_R32G32_SINT);
1588 formats.push_back(VK_FORMAT_R32G32B32_SINT);
1589 formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1590 formats.push_back(VK_FORMAT_R32_UINT);
1591 formats.push_back(VK_FORMAT_R32G32_UINT);
1592 formats.push_back(VK_FORMAT_R32G32B32_UINT);
1593 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1594 formats.push_back(VK_FORMAT_R64_SINT);
1595 formats.push_back(VK_FORMAT_R64G64_SINT);
1596 formats.push_back(VK_FORMAT_R64G64B64_SINT);
1597 formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
1598 formats.push_back(VK_FORMAT_R64_UINT);
1599 formats.push_back(VK_FORMAT_R64G64_UINT);
1600 formats.push_back(VK_FORMAT_R64G64B64_UINT);
1601 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
1602 formats.push_back(VK_FORMAT_R16_SFLOAT);
1603 formats.push_back(VK_FORMAT_R16G16_SFLOAT);
1604 formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
1605 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
1606 formats.push_back(VK_FORMAT_R32_SFLOAT);
1607 formats.push_back(VK_FORMAT_R32G32_SFLOAT);
1608 formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
1609 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
1610 formats.push_back(VK_FORMAT_R64_SFLOAT);
1611 formats.push_back(VK_FORMAT_R64G64_SFLOAT);
1612 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
1613 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
1614 formats.push_back(VK_FORMAT_R8_USCALED);
1615 formats.push_back(VK_FORMAT_R8G8_USCALED);
1616 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
1617 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
1622 bool vkt::subgroups::isFormatSigned (VkFormat format)
1628 case VK_FORMAT_R8_SINT:
1629 case VK_FORMAT_R8G8_SINT:
1630 case VK_FORMAT_R8G8B8_SINT:
1631 case VK_FORMAT_R8G8B8A8_SINT:
1632 case VK_FORMAT_R16_SINT:
1633 case VK_FORMAT_R16G16_SINT:
1634 case VK_FORMAT_R16G16B16_SINT:
1635 case VK_FORMAT_R16G16B16A16_SINT:
1636 case VK_FORMAT_R32_SINT:
1637 case VK_FORMAT_R32G32_SINT:
1638 case VK_FORMAT_R32G32B32_SINT:
1639 case VK_FORMAT_R32G32B32A32_SINT:
1640 case VK_FORMAT_R64_SINT:
1641 case VK_FORMAT_R64G64_SINT:
1642 case VK_FORMAT_R64G64B64_SINT:
1643 case VK_FORMAT_R64G64B64A64_SINT:
1648 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
1654 case VK_FORMAT_R8_UINT:
1655 case VK_FORMAT_R8G8_UINT:
1656 case VK_FORMAT_R8G8B8_UINT:
1657 case VK_FORMAT_R8G8B8A8_UINT:
1658 case VK_FORMAT_R16_UINT:
1659 case VK_FORMAT_R16G16_UINT:
1660 case VK_FORMAT_R16G16B16_UINT:
1661 case VK_FORMAT_R16G16B16A16_UINT:
1662 case VK_FORMAT_R32_UINT:
1663 case VK_FORMAT_R32G32_UINT:
1664 case VK_FORMAT_R32G32B32_UINT:
1665 case VK_FORMAT_R32G32B32A32_UINT:
1666 case VK_FORMAT_R64_UINT:
1667 case VK_FORMAT_R64G64_UINT:
1668 case VK_FORMAT_R64G64B64_UINT:
1669 case VK_FORMAT_R64G64B64A64_UINT:
1674 bool vkt::subgroups::isFormatFloat (VkFormat format)
1680 case VK_FORMAT_R16_SFLOAT:
1681 case VK_FORMAT_R16G16_SFLOAT:
1682 case VK_FORMAT_R16G16B16_SFLOAT:
1683 case VK_FORMAT_R16G16B16A16_SFLOAT:
1684 case VK_FORMAT_R32_SFLOAT:
1685 case VK_FORMAT_R32G32_SFLOAT:
1686 case VK_FORMAT_R32G32B32_SFLOAT:
1687 case VK_FORMAT_R32G32B32A32_SFLOAT:
1688 case VK_FORMAT_R64_SFLOAT:
1689 case VK_FORMAT_R64G64_SFLOAT:
1690 case VK_FORMAT_R64G64B64_SFLOAT:
1691 case VK_FORMAT_R64G64B64A64_SFLOAT:
1696 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
1699 "layout(location = 0) in highp vec4 in_position;\n"
1700 "void main (void)\n"
1702 " gl_Position = in_position;\n"
1703 " gl_PointSize = 1.0f;\n"
1706 programCollection.spirvAsmSources.add("vert") <<
1709 "; Generator: Khronos Glslang Reference Front End; 7\n"
1712 "OpCapability Shader\n"
1713 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1714 "OpMemoryModel Logical GLSL450\n"
1715 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
1716 "OpMemberDecorate %11 0 BuiltIn Position\n"
1717 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1718 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1719 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1720 "OpDecorate %11 Block\n"
1721 "OpDecorate %17 Location 0\n"
1723 "%3 = OpTypeFunction %2\n"
1724 "%6 = OpTypeFloat 32\n"
1725 "%7 = OpTypeVector %6 4\n"
1726 "%8 = OpTypeInt 32 0\n"
1727 "%9 = OpConstant %8 1\n"
1728 "%10 = OpTypeArray %6 %9\n"
1729 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1730 "%12 = OpTypePointer Output %11\n"
1731 "%13 = OpVariable %12 Output\n"
1732 "%14 = OpTypeInt 32 1\n"
1733 "%15 = OpConstant %14 0\n"
1734 "%16 = OpTypePointer Input %7\n"
1735 "%17 = OpVariable %16 Input\n"
1736 "%19 = OpTypePointer Output %7\n"
1737 "%21 = OpConstant %14 1\n"
1738 "%22 = OpConstant %6 1\n"
1739 "%23 = OpTypePointer Output %6\n"
1740 "%4 = OpFunction %2 None %3\n"
1742 "%18 = OpLoad %7 %17\n"
1743 "%20 = OpAccessChain %19 %13 %15\n"
1745 "%24 = OpAccessChain %23 %13 %21\n"
1751 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
1754 "layout(location = 0) in float in_color;\n"
1755 "layout(location = 0) out uint out_color;\n"
1758 " out_color = uint(in_color);\n"
1761 programCollection.spirvAsmSources.add("fragment") <<
1764 "; Generator: Khronos Glslang Reference Front End; 2\n"
1767 "OpCapability Shader\n"
1768 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1769 "OpMemoryModel Logical GLSL450\n"
1770 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
1771 "OpExecutionMode %4 OriginUpperLeft\n"
1772 "OpDecorate %8 Location 0\n"
1773 "OpDecorate %11 Location 0\n"
1775 "%3 = OpTypeFunction %2\n"
1776 "%6 = OpTypeInt 32 0\n"
1777 "%7 = OpTypePointer Output %6\n"
1778 "%8 = OpVariable %7 Output\n"
1779 "%9 = OpTypeFloat 32\n"
1780 "%10 = OpTypePointer Input %9\n"
1781 "%11 = OpVariable %10 Input\n"
1782 "%4 = OpFunction %2 None %3\n"
1784 "%12 = OpLoad %9 %11\n"
1785 "%13 = OpConvertFToU %6 %12\n"
1791 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
1794 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1795 "#extension GL_EXT_tessellation_shader : require\n"
1796 "layout(vertices = 2) out;\n"
1797 "void main (void)\n"
1799 " if (gl_InvocationID == 0)\n"
1801 " gl_TessLevelOuter[0] = 1.0f;\n"
1802 " gl_TessLevelOuter[1] = 1.0f;\n"
1804 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1807 programCollection.spirvAsmSources.add("tesc") <<
1810 "; Generator: Khronos Glslang Reference Front End; 2\n"
1813 "OpCapability Tessellation\n"
1814 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1815 "OpMemoryModel Logical GLSL450\n"
1816 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
1817 "OpExecutionMode %4 OutputVertices 2\n"
1818 "OpDecorate %8 BuiltIn InvocationId\n"
1819 "OpDecorate %20 Patch\n"
1820 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1821 "OpMemberDecorate %29 0 BuiltIn Position\n"
1822 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1823 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1824 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1825 "OpDecorate %29 Block\n"
1826 "OpMemberDecorate %35 0 BuiltIn Position\n"
1827 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
1828 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
1829 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
1830 "OpDecorate %35 Block\n"
1832 "%3 = OpTypeFunction %2\n"
1833 "%6 = OpTypeInt 32 1\n"
1834 "%7 = OpTypePointer Input %6\n"
1835 "%8 = OpVariable %7 Input\n"
1836 "%10 = OpConstant %6 0\n"
1837 "%11 = OpTypeBool\n"
1838 "%15 = OpTypeFloat 32\n"
1839 "%16 = OpTypeInt 32 0\n"
1840 "%17 = OpConstant %16 4\n"
1841 "%18 = OpTypeArray %15 %17\n"
1842 "%19 = OpTypePointer Output %18\n"
1843 "%20 = OpVariable %19 Output\n"
1844 "%21 = OpConstant %15 1\n"
1845 "%22 = OpTypePointer Output %15\n"
1846 "%24 = OpConstant %6 1\n"
1847 "%26 = OpTypeVector %15 4\n"
1848 "%27 = OpConstant %16 1\n"
1849 "%28 = OpTypeArray %15 %27\n"
1850 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1851 "%30 = OpConstant %16 2\n"
1852 "%31 = OpTypeArray %29 %30\n"
1853 "%32 = OpTypePointer Output %31\n"
1854 "%33 = OpVariable %32 Output\n"
1855 "%35 = OpTypeStruct %26 %15 %28 %28\n"
1856 "%36 = OpConstant %16 32\n"
1857 "%37 = OpTypeArray %35 %36\n"
1858 "%38 = OpTypePointer Input %37\n"
1859 "%39 = OpVariable %38 Input\n"
1860 "%41 = OpTypePointer Input %26\n"
1861 "%44 = OpTypePointer Output %26\n"
1862 "%4 = OpFunction %2 None %3\n"
1864 "%9 = OpLoad %6 %8\n"
1865 "%12 = OpIEqual %11 %9 %10\n"
1866 "OpSelectionMerge %14 None\n"
1867 "OpBranchConditional %12 %13 %14\n"
1869 "%23 = OpAccessChain %22 %20 %10\n"
1871 "%25 = OpAccessChain %22 %20 %24\n"
1875 "%34 = OpLoad %6 %8\n"
1876 "%40 = OpLoad %6 %8\n"
1877 "%42 = OpAccessChain %41 %39 %40 %10\n"
1878 "%43 = OpLoad %26 %42\n"
1879 "%45 = OpAccessChain %44 %33 %34 %10\n"
1885 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
1888 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1889 "#extension GL_EXT_tessellation_shader : require\n"
1890 "layout(isolines, equal_spacing, ccw ) in;\n"
1891 "layout(location = 0) in float in_color[];\n"
1892 "layout(location = 0) out float out_color;\n"
1894 "void main (void)\n"
1896 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1897 " out_color = in_color[0];\n"
1900 programCollection.spirvAsmSources.add("tese") <<
1903 "; Generator: Khronos Glslang Reference Front End; 2\n"
1906 "OpCapability Tessellation\n"
1907 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1908 "OpMemoryModel Logical GLSL450\n"
1909 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
1910 "OpExecutionMode %4 Isolines\n"
1911 "OpExecutionMode %4 SpacingEqual\n"
1912 "OpExecutionMode %4 VertexOrderCcw\n"
1913 "OpMemberDecorate %11 0 BuiltIn Position\n"
1914 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1915 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1916 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1917 "OpDecorate %11 Block\n"
1918 "OpMemberDecorate %16 0 BuiltIn Position\n"
1919 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
1920 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
1921 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
1922 "OpDecorate %16 Block\n"
1923 "OpDecorate %29 BuiltIn TessCoord\n"
1924 "OpDecorate %39 Location 0\n"
1925 "OpDecorate %42 Location 0\n"
1927 "%3 = OpTypeFunction %2\n"
1928 "%6 = OpTypeFloat 32\n"
1929 "%7 = OpTypeVector %6 4\n"
1930 "%8 = OpTypeInt 32 0\n"
1931 "%9 = OpConstant %8 1\n"
1932 "%10 = OpTypeArray %6 %9\n"
1933 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1934 "%12 = OpTypePointer Output %11\n"
1935 "%13 = OpVariable %12 Output\n"
1936 "%14 = OpTypeInt 32 1\n"
1937 "%15 = OpConstant %14 0\n"
1938 "%16 = OpTypeStruct %7 %6 %10 %10\n"
1939 "%17 = OpConstant %8 32\n"
1940 "%18 = OpTypeArray %16 %17\n"
1941 "%19 = OpTypePointer Input %18\n"
1942 "%20 = OpVariable %19 Input\n"
1943 "%21 = OpTypePointer Input %7\n"
1944 "%24 = OpConstant %14 1\n"
1945 "%27 = OpTypeVector %6 3\n"
1946 "%28 = OpTypePointer Input %27\n"
1947 "%29 = OpVariable %28 Input\n"
1948 "%30 = OpConstant %8 0\n"
1949 "%31 = OpTypePointer Input %6\n"
1950 "%36 = OpTypePointer Output %7\n"
1951 "%38 = OpTypePointer Output %6\n"
1952 "%39 = OpVariable %38 Output\n"
1953 "%40 = OpTypeArray %6 %17\n"
1954 "%41 = OpTypePointer Input %40\n"
1955 "%42 = OpVariable %41 Input\n"
1956 "%4 = OpFunction %2 None %3\n"
1958 "%22 = OpAccessChain %21 %20 %15 %15\n"
1959 "%23 = OpLoad %7 %22\n"
1960 "%25 = OpAccessChain %21 %20 %24 %15\n"
1961 "%26 = OpLoad %7 %25\n"
1962 "%32 = OpAccessChain %31 %29 %30\n"
1963 "%33 = OpLoad %6 %32\n"
1964 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
1965 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
1966 "%37 = OpAccessChain %36 %13 %15\n"
1968 "%43 = OpAccessChain %31 %42 %15\n"
1969 "%44 = OpLoad %6 %43\n"
1975 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection)
1977 tcu::StringTemplate geometryTemplate(glslTemplate);
1979 map<string, string> linesParams;
1980 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
1982 map<string, string> pointsParams;
1983 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
1985 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
1986 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
1989 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
1991 tcu::StringTemplate geometryTemplate(spirvTemplate);
1993 map<string, string> linesParams;
1994 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
1996 map<string, string> pointsParams;
1997 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
1999 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2000 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2003 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
2005 const vk::VkFormat format = data.format;
2006 const vk::VkDeviceSize size = data.numElements *
2007 (data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2008 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2010 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2015 DE_FATAL("Illegal buffer format");
2017 case VK_FORMAT_R8_SINT:
2018 case VK_FORMAT_R8G8_SINT:
2019 case VK_FORMAT_R8G8B8_SINT:
2020 case VK_FORMAT_R8G8B8A8_SINT:
2021 case VK_FORMAT_R8_UINT:
2022 case VK_FORMAT_R8G8_UINT:
2023 case VK_FORMAT_R8G8B8_UINT:
2024 case VK_FORMAT_R8G8B8A8_UINT:
2026 deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2028 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2030 ptr[k] = rnd.getUint8();
2034 case VK_FORMAT_R16_SINT:
2035 case VK_FORMAT_R16G16_SINT:
2036 case VK_FORMAT_R16G16B16_SINT:
2037 case VK_FORMAT_R16G16B16A16_SINT:
2038 case VK_FORMAT_R16_UINT:
2039 case VK_FORMAT_R16G16_UINT:
2040 case VK_FORMAT_R16G16B16_UINT:
2041 case VK_FORMAT_R16G16B16A16_UINT:
2043 deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2045 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2047 ptr[k] = rnd.getUint16();
2051 case VK_FORMAT_R8_USCALED:
2052 case VK_FORMAT_R8G8_USCALED:
2053 case VK_FORMAT_R8G8B8_USCALED:
2054 case VK_FORMAT_R8G8B8A8_USCALED:
2056 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2058 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2060 deUint32 r = rnd.getUint32();
2061 ptr[k] = (r & 1) ? r : 0;
2065 case VK_FORMAT_R32_SINT:
2066 case VK_FORMAT_R32G32_SINT:
2067 case VK_FORMAT_R32G32B32_SINT:
2068 case VK_FORMAT_R32G32B32A32_SINT:
2069 case VK_FORMAT_R32_UINT:
2070 case VK_FORMAT_R32G32_UINT:
2071 case VK_FORMAT_R32G32B32_UINT:
2072 case VK_FORMAT_R32G32B32A32_UINT:
2074 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2076 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2078 ptr[k] = rnd.getUint32();
2082 case VK_FORMAT_R64_SINT:
2083 case VK_FORMAT_R64G64_SINT:
2084 case VK_FORMAT_R64G64B64_SINT:
2085 case VK_FORMAT_R64G64B64A64_SINT:
2086 case VK_FORMAT_R64_UINT:
2087 case VK_FORMAT_R64G64_UINT:
2088 case VK_FORMAT_R64G64B64_UINT:
2089 case VK_FORMAT_R64G64B64A64_UINT:
2091 deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2093 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2095 ptr[k] = rnd.getUint64();
2099 case VK_FORMAT_R16_SFLOAT:
2100 case VK_FORMAT_R16G16_SFLOAT:
2101 case VK_FORMAT_R16G16B16_SFLOAT:
2102 case VK_FORMAT_R16G16B16A16_SFLOAT:
2104 deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2106 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2108 ptr[k] = deFloat32To16(rnd.getFloat());
2112 case VK_FORMAT_R32_SFLOAT:
2113 case VK_FORMAT_R32G32_SFLOAT:
2114 case VK_FORMAT_R32G32B32_SFLOAT:
2115 case VK_FORMAT_R32G32B32A32_SFLOAT:
2117 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2119 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2121 ptr[k] = rnd.getFloat();
2125 case VK_FORMAT_R64_SFLOAT:
2126 case VK_FORMAT_R64G64_SFLOAT:
2127 case VK_FORMAT_R64G64B64_SFLOAT:
2128 case VK_FORMAT_R64G64B64A64_SFLOAT:
2130 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2132 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2134 ptr[k] = rnd.getDouble();
2140 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2142 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2144 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2150 if (subgroups::SSBOData::InitializeNone != data.initializeType)
2152 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2156 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2160 case VK_SHADER_STAGE_VERTEX_BIT:
2163 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2166 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2169 case VK_SHADER_STAGE_GEOMETRY_BIT:
2180 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (
2181 Context& context, VkFormat format, SSBOData* extraData,
2182 deUint32 extraDataCount,
2183 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2184 const VkShaderStageFlags shaderStage)
2186 const DeviceInterface& vk = context.getDeviceInterface();
2187 const VkDevice device = context.getDevice();
2188 const deUint32 maxWidth = getMaxWidth();
2189 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2190 DescriptorSetLayoutBuilder layoutBuilder;
2191 DescriptorPoolBuilder poolBuilder;
2192 DescriptorSetUpdateBuilder updateBuilder;
2193 Move <VkDescriptorPool> descriptorPool;
2194 Move <VkDescriptorSet> descriptorSet;
2196 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device,
2197 context.getBinaryCollection().get("vert"), 0u));
2198 const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(vk, device,
2199 context.getBinaryCollection().get("tesc"), 0u));
2200 const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(vk, device,
2201 context.getBinaryCollection().get("tese"), 0u));
2202 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device,
2203 context.getBinaryCollection().get("fragment"), 0u));
2204 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2206 const VkVertexInputBindingDescription vertexInputBinding =
2209 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
2210 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
2213 const VkVertexInputAttributeDescription vertexInputAttribute =
2217 VK_FORMAT_R32G32B32A32_SFLOAT,
2221 for (deUint32 i = 0u; i < extraDataCount; i++)
2223 if (extraData[i].isImage)
2225 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2229 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2230 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2232 const Allocation& alloc = inputBuffers[i]->getAllocation();
2233 initializeMemory(context, alloc, extraData[i]);
2236 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2237 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2239 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2241 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2243 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2244 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2245 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2246 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2247 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
2249 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2250 poolBuilder.addType(inputBuffers[ndx]->getType());
2252 if (extraDataCount > 0)
2254 descriptorPool = poolBuilder.build(vk, device,
2255 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2256 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2259 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2261 if (inputBuffers[buffersNdx]->isImage())
2263 VkDescriptorImageInfo info =
2264 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2265 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2267 updateBuilder.writeSingle(*descriptorSet,
2268 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2269 inputBuffers[buffersNdx]->getType(), &info);
2273 VkDescriptorBufferInfo info =
2274 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2275 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2277 updateBuilder.writeSingle(*descriptorSet,
2278 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2279 inputBuffers[buffersNdx]->getType(), &info);
2283 updateBuilder.update(vk, device);
2285 const VkQueue queue = context.getUniversalQueue();
2286 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2287 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2288 const deUint32 subgroupSize = getSubgroupSize(context);
2289 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2290 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2291 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2292 unsigned totalIterations = 0u;
2293 unsigned failedIterations = 0u;
2294 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2297 const Allocation& alloc = vertexBuffer.getAllocation();
2298 std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2299 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2300 float leftHandPosition = -1.0f;
2302 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2304 data[ndx][0] = leftHandPosition;
2305 leftHandPosition += pixelSize;
2306 data[ndx+1][0] = leftHandPosition;
2309 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2310 flushAlloc(vk, device, alloc);
2313 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2314 const VkViewport viewport = makeViewport(maxWidth, 1u);
2315 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2316 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2317 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2318 const VkDeviceSize vertexBufferOffset = 0u;
2320 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2324 beginCommandBuffer(vk, *cmdBuffer);
2327 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2328 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2330 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2332 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2334 if (extraDataCount > 0)
2336 vk.cmdBindDescriptorSets(*cmdBuffer,
2337 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2338 &descriptorSet.get(), 0u, DE_NULL);
2341 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2342 vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2344 endRenderPass(vk, *cmdBuffer);
2346 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2347 endCommandBuffer(vk, *cmdBuffer);
2349 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2353 const Allocation& allocResult = imageBufferResult.getAllocation();
2354 invalidateAlloc(vk, device, allocResult);
2356 std::vector<const void*> datas;
2357 datas.push_back(allocResult.getHostPtr());
2358 if (!checkResult(datas, width/2u, subgroupSize))
2363 if (0 < failedIterations)
2365 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2367 context.getTestContext().getLog()
2368 << TestLog::Message << valuesPassed << " / "
2369 << totalIterations << " values passed" << TestLog::EndMessage;
2370 return tcu::TestStatus::fail("Failed!");
2373 return tcu::TestStatus::pass("OK");
2376 bool vkt::subgroups::check(std::vector<const void*> datas,
2377 deUint32 width, deUint32 ref)
2379 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2381 for (deUint32 n = 0; n < width; ++n)
2392 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
2393 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2396 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2397 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2398 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2400 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2403 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
2404 Context& context, VkFormat format, SSBOData* extraData,
2405 deUint32 extraDataCount,
2406 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2408 const DeviceInterface& vk = context.getDeviceInterface();
2409 const VkDevice device = context.getDevice();
2410 const deUint32 maxWidth = getMaxWidth();
2411 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2412 DescriptorSetLayoutBuilder layoutBuilder;
2413 DescriptorPoolBuilder poolBuilder;
2414 DescriptorSetUpdateBuilder updateBuilder;
2415 Move <VkDescriptorPool> descriptorPool;
2416 Move <VkDescriptorSet> descriptorSet;
2418 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2419 const Unique<VkShaderModule> geometryShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2420 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2421 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2422 const VkVertexInputBindingDescription vertexInputBinding =
2425 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
2426 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
2429 const VkVertexInputAttributeDescription vertexInputAttribute =
2433 VK_FORMAT_R32G32B32A32_SFLOAT,
2437 for (deUint32 i = 0u; i < extraDataCount; i++)
2439 if (extraData[i].isImage)
2441 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2445 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2446 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2448 const Allocation& alloc = inputBuffers[i]->getAllocation();
2449 initializeMemory(context, alloc, extraData[i]);
2452 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2453 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2455 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2457 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2459 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2460 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2461 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2462 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
2464 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2465 poolBuilder.addType(inputBuffers[ndx]->getType());
2467 if (extraDataCount > 0)
2469 descriptorPool = poolBuilder.build(vk, device,
2470 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2471 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2474 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2476 if (inputBuffers[buffersNdx]->isImage())
2478 VkDescriptorImageInfo info =
2479 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2480 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2482 updateBuilder.writeSingle(*descriptorSet,
2483 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2484 inputBuffers[buffersNdx]->getType(), &info);
2488 VkDescriptorBufferInfo info =
2489 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2490 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2492 updateBuilder.writeSingle(*descriptorSet,
2493 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2494 inputBuffers[buffersNdx]->getType(), &info);
2498 updateBuilder.update(vk, device);
2500 const VkQueue queue = context.getUniversalQueue();
2501 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2502 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2503 const deUint32 subgroupSize = getSubgroupSize(context);
2504 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2505 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2506 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2507 unsigned totalIterations = 0u;
2508 unsigned failedIterations = 0u;
2509 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2512 const Allocation& alloc = vertexBuffer.getAllocation();
2513 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2514 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2515 float leftHandPosition = -1.0f;
2517 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2519 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2520 leftHandPosition += pixelSize;
2523 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2524 flushAlloc(vk, device, alloc);
2527 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2528 const VkViewport viewport = makeViewport(maxWidth, 1u);
2529 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2530 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2531 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2532 const VkDeviceSize vertexBufferOffset = 0u;
2534 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2538 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2540 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2541 initializeMemory(context, alloc, extraData[ndx]);
2544 beginCommandBuffer(vk, *cmdBuffer);
2546 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2548 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2550 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2552 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2554 if (extraDataCount > 0)
2556 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2557 &descriptorSet.get(), 0u, DE_NULL);
2560 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2562 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2564 endRenderPass(vk, *cmdBuffer);
2566 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2568 endCommandBuffer(vk, *cmdBuffer);
2570 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2574 const Allocation& allocResult = imageBufferResult.getAllocation();
2575 invalidateAlloc(vk, device, allocResult);
2577 std::vector<const void*> datas;
2578 datas.push_back(allocResult.getHostPtr());
2579 if (!checkResult(datas, width, subgroupSize))
2584 if (0 < failedIterations)
2586 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2588 context.getTestContext().getLog()
2589 << TestLog::Message << valuesPassed << " / "
2590 << totalIterations << " values passed" << TestLog::EndMessage;
2592 return tcu::TestStatus::fail("Failed!");
2595 return tcu::TestStatus::pass("OK");
2599 tcu::TestStatus vkt::subgroups::allStages(
2600 Context& context, VkFormat format, SSBOData* extraDatas,
2601 deUint32 extraDatasCount,
2602 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2603 const VkShaderStageFlags shaderStageTested)
2605 const DeviceInterface& vk = context.getDeviceInterface();
2606 const VkDevice device = context.getDevice();
2607 const deUint32 maxWidth = getMaxWidth();
2608 vector<VkShaderStageFlagBits> stagesVector;
2609 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
2611 Move<VkShaderModule> vertexShaderModule;
2612 Move<VkShaderModule> teCtrlShaderModule;
2613 Move<VkShaderModule> teEvalShaderModule;
2614 Move<VkShaderModule> geometryShaderModule;
2615 Move<VkShaderModule> fragmentShaderModule;
2617 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
2619 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
2621 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2623 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
2624 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
2625 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
2627 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2629 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
2630 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
2631 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2633 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
2635 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
2636 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
2637 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
2639 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2641 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
2642 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
2645 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
2646 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
2647 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
2648 const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
2650 shaderStageRequired = shaderStageTested | shaderStageRequired;
2652 vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
2653 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2655 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
2656 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
2658 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
2660 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2662 // tessellation shaders output line primitives
2663 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
2667 // otherwise points are processed by geometry shader
2668 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
2671 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
2672 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
2674 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
2676 DescriptorSetLayoutBuilder layoutBuilder;
2677 // The implicit result SSBO we use to store our outputs from the shader
2678 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
2680 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
2681 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
2682 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2684 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
2687 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
2689 const deUint32 datasNdx = ndx - stagesCount;
2690 if (extraDatas[datasNdx].isImage)
2692 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
2696 const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
2697 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2700 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2701 initializeMemory(context, alloc, extraDatas[datasNdx]);
2703 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
2704 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
2707 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2709 const Unique<VkPipelineLayout> pipelineLayout(
2710 makePipelineLayout(vk, device, *descriptorSetLayout));
2712 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2713 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2714 shaderStageRequired,
2715 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
2717 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
2719 Move <VkDescriptorPool> descriptorPool;
2720 Move <VkDescriptorSet> descriptorSet;
2722 if (inputBuffers.size() > 0)
2724 DescriptorPoolBuilder poolBuilder;
2726 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
2728 poolBuilder.addType(inputBuffers[ndx]->getType());
2731 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2733 // Create descriptor set
2734 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2736 DescriptorSetUpdateBuilder updateBuilder;
2738 for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
2741 if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
2742 else binding = extraDatas[ndx -stagesCount].binding;
2744 if (inputBuffers[ndx]->isImage())
2746 VkDescriptorImageInfo info =
2747 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2748 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2750 updateBuilder.writeSingle( *descriptorSet,
2751 DescriptorSetUpdateBuilder::Location::binding(binding),
2752 inputBuffers[ndx]->getType(), &info);
2756 VkDescriptorBufferInfo info =
2757 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2758 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2760 updateBuilder.writeSingle( *descriptorSet,
2761 DescriptorSetUpdateBuilder::Location::binding(binding),
2762 inputBuffers[ndx]->getType(), &info);
2766 updateBuilder.update(vk, device);
2770 const VkQueue queue = context.getUniversalQueue();
2771 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2772 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2773 const deUint32 subgroupSize = getSubgroupSize(context);
2774 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2775 unsigned totalIterations = 0u;
2776 unsigned failedIterations = 0u;
2777 Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2778 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
2779 const VkViewport viewport = makeViewport(maxWidth, 1u);
2780 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2781 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2782 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2783 const VkImageSubresourceRange subresourceRange =
2785 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
2786 0u, //deUint32 baseMipLevel
2787 1u, //deUint32 levelCount
2788 0u, //deUint32 baseArrayLayer
2789 1u //deUint32 layerCount
2792 const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier(
2793 (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2794 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2795 resultImage.getImage(), subresourceRange);
2797 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2799 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
2802 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2803 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
2808 beginCommandBuffer(vk, *cmdBuffer);
2810 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
2812 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2814 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2816 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2818 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2820 if (stagesCount + extraDatasCount > 0)
2821 vk.cmdBindDescriptorSets(*cmdBuffer,
2822 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2823 &descriptorSet.get(), 0u, DE_NULL);
2825 vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
2827 endRenderPass(vk, *cmdBuffer);
2829 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2831 endCommandBuffer(vk, *cmdBuffer);
2833 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2835 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
2837 std::vector<const void*> datas;
2838 if (!inputBuffers[ndx]->isImage())
2840 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
2841 invalidateAlloc(vk, device, resultAlloc);
2842 // we always have our result data first
2843 datas.push_back(resultAlloc.getHostPtr());
2846 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2848 const deUint32 datasNdx = index - stagesCount;
2849 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
2851 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
2852 invalidateAlloc(vk, device, resultAlloc);
2853 // we always have our result data first
2854 datas.push_back(resultAlloc.getHostPtr());
2858 if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
2861 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2863 std::vector<const void*> datas;
2864 const Allocation& resultAlloc = imageBufferResult.getAllocation();
2865 invalidateAlloc(vk, device, resultAlloc);
2867 // we always have our result data first
2868 datas.push_back(resultAlloc.getHostPtr());
2870 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2872 const deUint32 datasNdx = index - stagesCount;
2873 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
2875 const Allocation& alloc = inputBuffers[index]->getAllocation();
2876 invalidateAlloc(vk, device, alloc);
2877 // we always have our result data first
2878 datas.push_back(alloc.getHostPtr());
2882 if (!checkResult(datas, width, subgroupSize))
2886 vk.resetCommandBuffer(*cmdBuffer, 0);
2889 if (0 < failedIterations)
2891 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2893 context.getTestContext().getLog()
2894 << TestLog::Message << valuesPassed << " / "
2895 << totalIterations << " values passed" << TestLog::EndMessage;
2897 return tcu::TestStatus::fail("Failed!");
2901 return tcu::TestStatus::pass("OK");
2904 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
2905 SSBOData* extraData, deUint32 extraDataCount,
2906 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2908 const DeviceInterface& vk = context.getDeviceInterface();
2909 const VkDevice device = context.getDevice();
2910 const VkQueue queue = context.getUniversalQueue();
2911 const deUint32 maxWidth = getMaxWidth();
2912 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2913 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2914 DescriptorSetLayoutBuilder layoutBuilder;
2915 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2916 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2917 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2919 const VkVertexInputBindingDescription vertexInputBinding =
2922 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
2923 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
2926 const VkVertexInputAttributeDescription vertexInputAttribute =
2930 VK_FORMAT_R32G32B32A32_SFLOAT,
2934 for (deUint32 i = 0u; i < extraDataCount; i++)
2936 if (extraData[i].isImage)
2938 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2942 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2943 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2945 const Allocation& alloc = inputBuffers[i]->getAllocation();
2946 initializeMemory(context, alloc, extraData[i]);
2949 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2950 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
2952 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2954 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2956 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2957 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2958 *vertexShaderModule, *fragmentShaderModule,
2959 DE_NULL, DE_NULL, DE_NULL,
2960 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
2961 &vertexInputBinding, &vertexInputAttribute, true, format));
2962 DescriptorPoolBuilder poolBuilder;
2963 DescriptorSetUpdateBuilder updateBuilder;
2966 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2967 poolBuilder.addType(inputBuffers[ndx]->getType());
2969 Move <VkDescriptorPool> descriptorPool;
2970 Move <VkDescriptorSet> descriptorSet;
2972 if (extraDataCount > 0)
2974 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2975 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2978 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2980 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2981 initializeMemory(context, alloc, extraData[ndx]);
2984 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2986 if (inputBuffers[buffersNdx]->isImage())
2988 VkDescriptorImageInfo info =
2989 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2990 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2992 updateBuilder.writeSingle(*descriptorSet,
2993 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2994 inputBuffers[buffersNdx]->getType(), &info);
2998 VkDescriptorBufferInfo info =
2999 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3000 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3002 updateBuilder.writeSingle(*descriptorSet,
3003 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3004 inputBuffers[buffersNdx]->getType(), &info);
3007 updateBuilder.update(vk, device);
3009 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3011 const deUint32 subgroupSize = getSubgroupSize(context);
3013 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3015 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3016 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3018 unsigned totalIterations = 0u;
3019 unsigned failedIterations = 0u;
3021 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3024 const Allocation& alloc = vertexBuffer.getAllocation();
3025 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3026 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3027 float leftHandPosition = -1.0f;
3029 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3031 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3032 leftHandPosition += pixelSize;
3035 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3036 flushAlloc(vk, device, alloc);
3039 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3040 const VkViewport viewport = makeViewport(maxWidth, 1u);
3041 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3042 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3043 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3044 const VkDeviceSize vertexBufferOffset = 0u;
3046 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3050 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3052 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3053 initializeMemory(context, alloc, extraData[ndx]);
3056 beginCommandBuffer(vk, *cmdBuffer);
3058 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3060 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3062 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3064 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3066 if (extraDataCount > 0)
3068 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3069 &descriptorSet.get(), 0u, DE_NULL);
3072 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3074 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3076 endRenderPass(vk, *cmdBuffer);
3078 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3080 endCommandBuffer(vk, *cmdBuffer);
3082 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3086 const Allocation& allocResult = imageBufferResult.getAllocation();
3087 invalidateAlloc(vk, device, allocResult);
3089 std::vector<const void*> datas;
3090 datas.push_back(allocResult.getHostPtr());
3091 if (!checkResult(datas, width, subgroupSize))
3096 if (0 < failedIterations)
3098 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3100 context.getTestContext().getLog()
3101 << TestLog::Message << valuesPassed << " / "
3102 << totalIterations << " values passed" << TestLog::EndMessage;
3104 return tcu::TestStatus::fail("Failed!");
3107 return tcu::TestStatus::pass("OK");
3111 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context& context, VkFormat format, SSBOData* extraDatas,
3112 deUint32 extraDatasCount,
3113 bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
3114 deUint32 height, deUint32 subgroupSize))
3116 const DeviceInterface& vk = context.getDeviceInterface();
3117 const VkDevice device = context.getDevice();
3118 const VkQueue queue = context.getUniversalQueue();
3119 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3120 const Unique<VkShaderModule> vertexShaderModule (createShaderModule
3121 (vk, device, context.getBinaryCollection().get("vert"), 0u));
3122 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule
3123 (vk, device, context.getBinaryCollection().get("fragment"), 0u));
3125 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
3127 for (deUint32 i = 0; i < extraDatasCount; i++)
3129 if (extraDatas[i].isImage)
3131 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3132 static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3136 vk::VkDeviceSize size =
3137 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3138 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3141 const Allocation& alloc = inputBuffers[i]->getAllocation();
3142 initializeMemory(context, alloc, extraDatas[i]);
3145 DescriptorSetLayoutBuilder layoutBuilder;
3147 for (deUint32 i = 0; i < extraDatasCount; i++)
3149 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
3150 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3153 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3154 layoutBuilder.build(vk, device));
3156 const Unique<VkPipelineLayout> pipelineLayout(
3157 makePipelineLayout(vk, device, *descriptorSetLayout));
3159 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3160 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3161 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3162 *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3163 DE_NULL, DE_NULL, true));
3165 DescriptorPoolBuilder poolBuilder;
3167 // To stop validation complaining, always add at least one type to pool.
3168 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3169 for (deUint32 i = 0; i < extraDatasCount; i++)
3171 poolBuilder.addType(inputBuffers[i]->getType());
3174 Move<VkDescriptorPool> descriptorPool;
3175 // Create descriptor set
3176 Move<VkDescriptorSet> descriptorSet;
3178 if (extraDatasCount > 0)
3180 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3182 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3185 DescriptorSetUpdateBuilder updateBuilder;
3187 for (deUint32 i = 0; i < extraDatasCount; i++)
3189 if (inputBuffers[i]->isImage())
3191 VkDescriptorImageInfo info =
3192 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3193 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3195 updateBuilder.writeSingle(*descriptorSet,
3196 DescriptorSetUpdateBuilder::Location::binding(i),
3197 inputBuffers[i]->getType(), &info);
3201 VkDescriptorBufferInfo info =
3202 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
3203 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3205 updateBuilder.writeSingle(*descriptorSet,
3206 DescriptorSetUpdateBuilder::Location::binding(i),
3207 inputBuffers[i]->getType(), &info);
3211 if (extraDatasCount > 0)
3212 updateBuilder.update(vk, device);
3214 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3216 const deUint32 subgroupSize = getSubgroupSize(context);
3218 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3220 unsigned totalIterations = 0;
3221 unsigned failedIterations = 0;
3223 for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3225 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3230 for (deUint32 i = 0; i < extraDatasCount; i++)
3232 const Allocation& alloc = inputBuffers[i]->getAllocation();
3233 initializeMemory(context, alloc, extraDatas[i]);
3236 VkDeviceSize formatSize = getFormatSizeInBytes(format);
3237 const VkDeviceSize resultImageSizeInBytes =
3238 width * height * formatSize;
3240 Image resultImage(context, width, height, format,
3241 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
3242 VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3244 Buffer resultBuffer(context, resultImageSizeInBytes,
3245 VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3247 const Unique<VkFramebuffer> framebuffer(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3249 beginCommandBuffer(vk, *cmdBuffer);
3251 VkViewport viewport = makeViewport(width, height);
3254 *cmdBuffer, 0, 1, &viewport);
3256 VkRect2D scissor = {{0, 0}, {width, height}};
3259 *cmdBuffer, 0, 1, &scissor);
3261 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3264 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3266 if (extraDatasCount > 0)
3268 vk.cmdBindDescriptorSets(*cmdBuffer,
3269 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3270 &descriptorSet.get(), 0u, DE_NULL);
3273 vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3275 endRenderPass(vk, *cmdBuffer);
3277 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3279 endCommandBuffer(vk, *cmdBuffer);
3281 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3283 std::vector<const void*> datas;
3285 const Allocation& resultAlloc = resultBuffer.getAllocation();
3286 invalidateAlloc(vk, device, resultAlloc);
3288 // we always have our result data first
3289 datas.push_back(resultAlloc.getHostPtr());
3292 if (!checkResult(datas, width, height, subgroupSize))
3297 vk.resetCommandBuffer(*cmdBuffer, 0);
3301 if (0 < failedIterations)
3303 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3305 context.getTestContext().getLog()
3306 << TestLog::Message << valuesPassed << " / "
3307 << totalIterations << " values passed" << TestLog::EndMessage;
3309 return tcu::TestStatus::fail("Failed!");
3312 return tcu::TestStatus::pass("OK");
3315 tcu::TestStatus vkt::subgroups::makeComputeTest(
3316 Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
3317 bool (*checkResult)(std::vector<const void*> datas,
3318 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
3319 deUint32 subgroupSize))
3321 const DeviceInterface& vk = context.getDeviceInterface();
3322 const VkDevice device = context.getDevice();
3323 const VkQueue queue = context.getUniversalQueue();
3324 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3325 VkDeviceSize elementSize = getFormatSizeInBytes(format);
3327 const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
3328 maxSupportedSubgroupSize() *
3329 maxSupportedSubgroupSize();
3330 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
3332 Buffer resultBuffer(
3333 context, resultBufferSizeInBytes);
3335 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
3337 for (deUint32 i = 0; i < inputsCount; i++)
3339 if (inputs[i].isImage)
3341 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3342 static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
3346 vk::VkDeviceSize size =
3347 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3348 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3351 const Allocation& alloc = inputBuffers[i]->getAllocation();
3352 initializeMemory(context, alloc, inputs[i]);
3355 DescriptorSetLayoutBuilder layoutBuilder;
3356 layoutBuilder.addBinding(
3357 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3359 for (deUint32 i = 0; i < inputsCount; i++)
3361 layoutBuilder.addBinding(
3362 inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3365 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3366 layoutBuilder.build(vk, device));
3368 const Unique<VkShaderModule> shaderModule(
3369 createShaderModule(vk, device,
3370 context.getBinaryCollection().get("comp"), 0u));
3371 const Unique<VkPipelineLayout> pipelineLayout(
3372 makePipelineLayout(vk, device, *descriptorSetLayout));
3374 DescriptorPoolBuilder poolBuilder;
3376 poolBuilder.addType(resultBuffer.getType());
3378 for (deUint32 i = 0; i < inputsCount; i++)
3380 poolBuilder.addType(inputBuffers[i]->getType());
3383 const Unique<VkDescriptorPool> descriptorPool(
3384 poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3386 // Create descriptor set
3387 const Unique<VkDescriptorSet> descriptorSet(
3388 makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3390 DescriptorSetUpdateBuilder updateBuilder;
3392 const VkDescriptorBufferInfo resultDescriptorInfo =
3393 makeDescriptorBufferInfo(
3394 resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
3396 updateBuilder.writeSingle(*descriptorSet,
3397 DescriptorSetUpdateBuilder::Location::binding(0u),
3398 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
3400 for (deUint32 i = 0; i < inputsCount; i++)
3402 if (inputBuffers[i]->isImage())
3404 VkDescriptorImageInfo info =
3405 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3406 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3408 updateBuilder.writeSingle(*descriptorSet,
3409 DescriptorSetUpdateBuilder::Location::binding(i + 1),
3410 inputBuffers[i]->getType(), &info);
3414 vk::VkDeviceSize size =
3415 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3416 VkDescriptorBufferInfo info =
3417 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
3419 updateBuilder.writeSingle(*descriptorSet,
3420 DescriptorSetUpdateBuilder::Location::binding(i + 1),
3421 inputBuffers[i]->getType(), &info);
3425 updateBuilder.update(vk, device);
3427 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3429 unsigned totalIterations = 0;
3430 unsigned failedIterations = 0;
3432 const deUint32 subgroupSize = getSubgroupSize(context);
3434 const Unique<VkCommandBuffer> cmdBuffer(
3435 makeCommandBuffer(context, *cmdPool));
3437 const deUint32 numWorkgroups[3] = {4, 2, 2};
3439 const deUint32 localSizesToTestCount = 8;
3440 deUint32 localSizesToTest[localSizesToTestCount][3] =
3443 {subgroupSize, 1, 1},
3444 {1, subgroupSize, 1},
3445 {1, 1, subgroupSize},
3449 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
3452 Move<VkPipeline> pipelines[localSizesToTestCount - 1];
3454 makeComputePipeline(context, *pipelineLayout, *shaderModule,
3455 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, (VkPipeline) DE_NULL,
3456 localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]);
3458 for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
3460 const deUint32 nextX = localSizesToTest[index][0];
3461 const deUint32 nextY = localSizesToTest[index][1];
3462 const deUint32 nextZ = localSizesToTest[index][2];
3465 makeComputePipeline(context, *pipelineLayout, *shaderModule,
3466 VK_PIPELINE_CREATE_DERIVATIVE_BIT, *pipelines[0],
3467 nextX, nextY, nextZ);
3470 for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
3473 // we are running one test
3476 beginCommandBuffer(vk, *cmdBuffer);
3478 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelines[index]);
3480 vk.cmdBindDescriptorSets(*cmdBuffer,
3481 VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
3482 &descriptorSet.get(), 0u, DE_NULL);
3484 vk.cmdDispatch(*cmdBuffer,numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
3486 endCommandBuffer(vk, *cmdBuffer);
3488 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3490 std::vector<const void*> datas;
3493 const Allocation& resultAlloc = resultBuffer.getAllocation();
3494 invalidateAlloc(vk, device, resultAlloc);
3496 // we always have our result data first
3497 datas.push_back(resultAlloc.getHostPtr());
3500 for (deUint32 i = 0; i < inputsCount; i++)
3502 if (!inputBuffers[i]->isImage())
3504 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
3505 invalidateAlloc(vk, device, resultAlloc);
3507 // we always have our result data first
3508 datas.push_back(resultAlloc.getHostPtr());
3512 if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
3517 vk.resetCommandBuffer(*cmdBuffer, 0);
3520 if (0 < failedIterations)
3522 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3524 context.getTestContext().getLog()
3525 << TestLog::Message << valuesPassed << " / "
3526 << totalIterations << " values passed" << TestLog::EndMessage;
3528 return tcu::TestStatus::fail("Failed!");
3531 return tcu::TestStatus::pass("OK");