1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2017 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * \brief Subgroups Tests Utils
23 */ /*--------------------------------------------------------------------*/
25 #include "vktSubgroupsTestsUtils.hpp"
26 #include "deRandom.hpp"
27 #include "tcuCommandLine.hpp"
28 #include "tcuStringTemplate.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkImageUtil.hpp"
31 #include "vkTypeUtil.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkObjUtil.hpp"
42 deUint32 getFormatSizeInBytes(const VkFormat format)
47 DE_FATAL("Unhandled format!");
49 case VK_FORMAT_R32_SINT:
50 case VK_FORMAT_R32_UINT:
51 return sizeof(deInt32);
52 case VK_FORMAT_R32G32_SINT:
53 case VK_FORMAT_R32G32_UINT:
54 return static_cast<deUint32>(sizeof(deInt32) * 2);
55 case VK_FORMAT_R32G32B32_SINT:
56 case VK_FORMAT_R32G32B32_UINT:
57 case VK_FORMAT_R32G32B32A32_SINT:
58 case VK_FORMAT_R32G32B32A32_UINT:
59 return static_cast<deUint32>(sizeof(deInt32) * 4);
60 case VK_FORMAT_R32_SFLOAT:
62 case VK_FORMAT_R32G32_SFLOAT:
64 case VK_FORMAT_R32G32B32_SFLOAT:
66 case VK_FORMAT_R32G32B32A32_SFLOAT:
68 case VK_FORMAT_R64_SFLOAT:
70 case VK_FORMAT_R64G64_SFLOAT:
72 case VK_FORMAT_R64G64B64_SFLOAT:
74 case VK_FORMAT_R64G64B64A64_SFLOAT:
76 // The below formats are used to represent bool and bvec* types. These
77 // types are passed to the shader as int and ivec* types, before the
78 // calculations are done as booleans. We need a distinct type here so
79 // that the shader generators can switch on it and generate the correct
80 // shader source for testing.
81 case VK_FORMAT_R8_USCALED:
82 return sizeof(deInt32);
83 case VK_FORMAT_R8G8_USCALED:
84 return static_cast<deUint32>(sizeof(deInt32) * 2);
85 case VK_FORMAT_R8G8B8_USCALED:
86 case VK_FORMAT_R8G8B8A8_USCALED:
87 return static_cast<deUint32>(sizeof(deInt32) * 4);
91 Move<VkPipelineLayout> makePipelineLayout(
92 Context& context, const VkDescriptorSetLayout descriptorSetLayout)
94 const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = {
95 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
96 DE_NULL, // const void* pNext;
97 0u, // VkPipelineLayoutCreateFlags flags;
98 1u, // deUint32 setLayoutCount;
99 &descriptorSetLayout, // const VkDescriptorSetLayout* pSetLayouts;
100 0u, // deUint32 pushConstantRangeCount;
101 DE_NULL, // const VkPushConstantRange* pPushConstantRanges;
103 return createPipelineLayout(context.getDeviceInterface(),
104 context.getDevice(), &pipelineLayoutParams);
107 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
109 VkAttachmentReference colorReference = {
110 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
113 const VkSubpassDescription subpassDescription = {0u,
114 VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
115 DE_NULL, DE_NULL, 0, DE_NULL
118 const VkSubpassDependency subpassDependencies[2] = {
119 { VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
120 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
121 VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
122 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
123 VK_DEPENDENCY_BY_REGION_BIT
125 { 0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
126 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
127 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
128 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
129 VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
133 VkAttachmentDescription attachmentDescription = {0u, format,
134 VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
135 VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
136 VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
137 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
140 const VkRenderPassCreateInfo renderPassCreateInfo = {
141 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
142 &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
145 return createRenderPass(context.getDeviceInterface(), context.getDevice(),
146 &renderPassCreateInfo);
149 Move<VkFramebuffer> makeFramebuffer(Context& context,
150 const VkRenderPass renderPass, const VkImageView imageView, deUint32 width,
153 const VkFramebufferCreateInfo framebufferCreateInfo = {
154 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, DE_NULL, 0u, renderPass, 1,
155 &imageView, width, height, 1
158 return createFramebuffer(context.getDeviceInterface(), context.getDevice(),
159 &framebufferCreateInfo);
162 Move<VkPipeline> makeGraphicsPipeline(Context& context,
163 const VkPipelineLayout pipelineLayout,
164 const VkShaderStageFlags stages,
165 const VkShaderModule vertexShaderModule,
166 const VkShaderModule fragmentShaderModule,
167 const VkShaderModule geometryShaderModule,
168 const VkShaderModule tessellationControlModule,
169 const VkShaderModule tessellationEvaluationModule,
170 const VkRenderPass renderPass,
171 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
172 const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL,
173 const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL,
174 const bool frameBufferTests = false,
175 const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
177 std::vector<VkViewport> noViewports;
178 std::vector<VkRect2D> noScissors;
180 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
182 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
183 DE_NULL, // const void* pNext;
184 0u, // VkPipelineVertexInputStateCreateFlags flags;
185 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount;
186 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
187 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount;
188 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
191 const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
192 const VkColorComponentFlags colorComponent =
193 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
194 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
195 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
196 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
198 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
200 VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
201 VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
205 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
207 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
208 VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
209 { 0.0f, 0.0f, 0.0f, 0.0f }
212 const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
214 return vk::makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk
215 context.getDevice(), // const VkDevice device
216 pipelineLayout, // const VkPipelineLayout pipelineLayout
217 vertexShaderModule, // const VkShaderModule vertexShaderModule
218 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
219 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
220 geometryShaderModule, // const VkShaderModule geometryShaderModule
221 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
222 renderPass, // const VkRenderPass renderPass
223 noViewports, // const std::vector<VkViewport>& viewports
224 noScissors, // const std::vector<VkRect2D>& scissors
225 topology, // const VkPrimitiveTopology topology
226 0u, // const deUint32 subpass
227 patchControlPoints, // const deUint32 patchControlPoints
228 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
229 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
230 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
231 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
232 &colorBlendStateCreateInfo); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
235 Move<VkPipeline> makeComputePipeline(Context& context,
236 const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
237 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
239 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
241 const vk::VkSpecializationMapEntry entries[3] =
243 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
244 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
245 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
248 const vk::VkSpecializationInfo info =
250 /* mapEntryCount = */ 3,
251 /* pMapEntries = */ entries,
252 /* dataSize = */ sizeof(localSize),
253 /* pData = */ localSize
256 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
258 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
259 DE_NULL, // const void* pNext;
260 0u, // VkPipelineShaderStageCreateFlags flags;
261 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
262 shaderModule, // VkShaderModule module;
263 "main", // const char* pName;
264 &info, // const VkSpecializationInfo* pSpecializationInfo;
267 const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
269 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
270 DE_NULL, // const void* pNext;
271 0u, // VkPipelineCreateFlags flags;
272 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
273 pipelineLayout, // VkPipelineLayout layout;
274 DE_NULL, // VkPipeline basePipelineHandle;
275 0, // deInt32 basePipelineIndex;
278 return createComputePipeline(context.getDeviceInterface(),
279 context.getDevice(), DE_NULL, &pipelineCreateInfo);
282 Move<VkDescriptorSet> makeDescriptorSet(Context& context,
283 const VkDescriptorPool descriptorPool,
284 const VkDescriptorSetLayout setLayout)
286 const VkDescriptorSetAllocateInfo allocateParams =
288 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType
290 DE_NULL, // const void* pNext;
291 descriptorPool, // VkDescriptorPool descriptorPool;
292 1u, // deUint32 setLayoutCount;
293 &setLayout, // const VkDescriptorSetLayout* pSetLayouts;
295 return allocateDescriptorSet(
296 context.getDeviceInterface(), context.getDevice(), &allocateParams);
299 Move<VkCommandPool> makeCommandPool(Context& context)
301 const VkCommandPoolCreateInfo commandPoolParams =
303 VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
304 DE_NULL, // const void* pNext;
305 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, // VkCommandPoolCreateFlags
307 context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex;
310 return createCommandPool(
311 context.getDeviceInterface(), context.getDevice(), &commandPoolParams);
314 Move<VkCommandBuffer> makeCommandBuffer(
315 Context& context, const VkCommandPool commandPool)
317 const VkCommandBufferAllocateInfo bufferAllocateParams =
319 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
320 DE_NULL, // const void* pNext;
321 commandPool, // VkCommandPool commandPool;
322 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
323 1u, // deUint32 bufferCount;
325 return allocateCommandBuffer(context.getDeviceInterface(),
326 context.getDevice(), &bufferAllocateParams);
329 Move<VkFence> submitCommandBuffer(
330 Context& context, const VkCommandBuffer commandBuffer)
332 const VkFenceCreateInfo fenceParams =
334 VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType sType;
335 DE_NULL, // const void* pNext;
336 0u, // VkFenceCreateFlags flags;
339 Move<VkFence> fence(createFence(
340 context.getDeviceInterface(), context.getDevice(), &fenceParams));
342 const VkSubmitInfo submitInfo =
344 VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType sType;
345 DE_NULL, // const void* pNext;
346 0u, // deUint32 waitSemaphoreCount;
347 DE_NULL, // const VkSemaphore* pWaitSemaphores;
348 (const VkPipelineStageFlags*)DE_NULL,
349 1u, // deUint32 commandBufferCount;
350 &commandBuffer, // const VkCommandBuffer* pCommandBuffers;
351 0u, // deUint32 signalSemaphoreCount;
352 DE_NULL, // const VkSemaphore* pSignalSemaphores;
355 vk::VkResult result = (context.getDeviceInterface().queueSubmit(
356 context.getUniversalQueue(), 1u, &submitInfo, *fence));
359 return Move<VkFence>(fence);
362 void waitFence(Context& context, Move<VkFence> fence)
364 VK_CHECK(context.getDeviceInterface().waitForFences(
365 context.getDevice(), 1u, &fence.get(), DE_TRUE, ~0ull));
378 Buffer* getAsBuffer()
380 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
381 return reinterpret_cast<Buffer* >(this);
386 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
387 return reinterpret_cast<Image*>(this);
390 virtual VkDescriptorType getType() const
394 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
398 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
402 Allocation& getAllocation() const
404 return *m_allocation;
407 virtual ~BufferOrImage() {}
410 explicit BufferOrImage(bool image) : m_isImage(image) {}
413 de::details::MovePtr<Allocation> m_allocation;
416 struct Buffer : public BufferOrImage
419 Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
420 : BufferOrImage (false)
421 , m_sizeInBytes (sizeInBytes)
424 const vk::VkBufferCreateInfo bufferCreateInfo =
426 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
431 VK_SHARING_MODE_EXCLUSIVE,
435 m_buffer = createBuffer(context.getDeviceInterface(),
436 context.getDevice(), &bufferCreateInfo);
437 vk::VkMemoryRequirements req = getBufferMemoryRequirements(
438 context.getDeviceInterface(), context.getDevice(), *m_buffer);
440 m_allocation = context.getDefaultAllocator().allocate(
441 req, MemoryRequirement::HostVisible);
442 VK_CHECK(context.getDeviceInterface().bindBufferMemory(
443 context.getDevice(), *m_buffer, m_allocation->getMemory(),
444 m_allocation->getOffset()));
447 virtual VkDescriptorType getType() const
449 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
451 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
453 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
456 VkBuffer getBuffer() const {
460 const VkBuffer* getBufferPtr() const {
464 VkDeviceSize getSize() const {
465 return m_sizeInBytes;
469 Move<VkBuffer> m_buffer;
470 VkDeviceSize m_sizeInBytes;
471 const VkBufferUsageFlags m_usage;
474 struct Image : public BufferOrImage
476 explicit Image(Context& context, deUint32 width, deUint32 height,
477 VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
478 : BufferOrImage(true)
480 const VkImageCreateInfo imageCreateInfo =
482 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
483 format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
484 VK_IMAGE_TILING_OPTIMAL, usage,
485 VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
486 VK_IMAGE_LAYOUT_UNDEFINED
488 m_image = createImage(context.getDeviceInterface(), context.getDevice(),
490 vk::VkMemoryRequirements req = getImageMemoryRequirements(
491 context.getDeviceInterface(), context.getDevice(), *m_image);
494 context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
495 VK_CHECK(context.getDeviceInterface().bindImageMemory(
496 context.getDevice(), *m_image, m_allocation->getMemory(),
497 m_allocation->getOffset()));
499 const VkComponentMapping componentMapping =
501 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
502 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
505 const VkImageViewCreateInfo imageViewCreateInfo =
507 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
508 VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
510 VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1,
514 m_imageView = createImageView(context.getDeviceInterface(),
515 context.getDevice(), &imageViewCreateInfo);
517 const struct VkSamplerCreateInfo samplerCreateInfo =
519 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
524 VK_SAMPLER_MIPMAP_MODE_NEAREST,
525 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
526 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
527 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
532 VK_COMPARE_OP_ALWAYS,
535 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
539 m_sampler = createSampler(context.getDeviceInterface(), context.getDevice(), &samplerCreateInfo);
542 VkImage getImage() const {
546 VkImageView getImageView() const {
550 VkSampler getSampler() const {
555 Move<VkImage> m_image;
556 Move<VkImageView> m_imageView;
557 Move<VkSampler> m_sampler;
561 std::string vkt::subgroups::getSharedMemoryBallotHelper()
563 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
564 "uvec4 sharedMemoryBallot(bool vote)\n"
566 " uint groupOffset = gl_SubgroupID;\n"
567 " // One invocation in the group 0's the whole group's data\n"
568 " if (subgroupElect())\n"
570 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
572 " subgroupMemoryBarrierShared();\n"
575 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
576 " const highp uint bitToSet = 1u << invocationId;\n"
577 " switch (gl_SubgroupInvocationID / 32)\n"
579 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
580 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
581 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
582 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
585 " subgroupMemoryBarrierShared();\n"
586 " return superSecretComputeShaderHelper[groupOffset];\n"
590 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
592 VkPhysicalDeviceSubgroupProperties subgroupProperties;
593 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
594 subgroupProperties.pNext = DE_NULL;
596 VkPhysicalDeviceProperties2 properties;
597 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
598 properties.pNext = &subgroupProperties;
600 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
602 return subgroupProperties.subgroupSize;
605 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
609 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
614 DE_FATAL("Unhandled stage!");
616 case VK_SHADER_STAGE_COMPUTE_BIT:
618 case VK_SHADER_STAGE_FRAGMENT_BIT:
620 case VK_SHADER_STAGE_VERTEX_BIT:
622 case VK_SHADER_STAGE_GEOMETRY_BIT:
624 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
625 return "tess_control";
626 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
631 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
636 DE_FATAL("Unknown subgroup feature category!");
638 case VK_SUBGROUP_FEATURE_BASIC_BIT:
639 return "VK_SUBGROUP_FEATURE_BASIC_BIT";
640 case VK_SUBGROUP_FEATURE_VOTE_BIT:
641 return "VK_SUBGROUP_FEATURE_VOTE_BIT";
642 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
643 return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
644 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
645 return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
646 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
647 return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
648 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
649 return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
650 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
651 return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
652 case VK_SUBGROUP_FEATURE_QUAD_BIT:
653 return "VK_SUBGROUP_FEATURE_QUAD_BIT";
657 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
664 " float pixelSize = 2.0f/1024.0f;\n"
665 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
666 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
667 " gl_PointSize = 1.0f;\n"
670 const std::string vertNoSubgroup =
673 "; Generator: Khronos Glslang Reference Front End; 1\n"
676 "OpCapability Shader\n"
677 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
678 "OpMemoryModel Logical GLSL450\n"
679 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
680 "OpMemberDecorate %20 0 BuiltIn Position\n"
681 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
682 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
683 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
684 "OpDecorate %20 Block\n"
685 "OpDecorate %26 BuiltIn VertexIndex\n"
687 "%3 = OpTypeFunction %2\n"
688 "%6 = OpTypeFloat 32\n"
689 "%7 = OpTypePointer Function %6\n"
690 "%9 = OpConstant %6 0.00195313\n"
691 "%12 = OpConstant %6 2\n"
692 "%14 = OpConstant %6 1\n"
693 "%16 = OpTypeVector %6 4\n"
694 "%17 = OpTypeInt 32 0\n"
695 "%18 = OpConstant %17 1\n"
696 "%19 = OpTypeArray %6 %18\n"
697 "%20 = OpTypeStruct %16 %6 %19 %19\n"
698 "%21 = OpTypePointer Output %20\n"
699 "%22 = OpVariable %21 Output\n"
700 "%23 = OpTypeInt 32 1\n"
701 "%24 = OpConstant %23 0\n"
702 "%25 = OpTypePointer Input %23\n"
703 "%26 = OpVariable %25 Input\n"
704 "%33 = OpConstant %6 0\n"
705 "%35 = OpTypePointer Output %16\n"
706 "%37 = OpConstant %23 1\n"
707 "%38 = OpTypePointer Output %6\n"
708 "%4 = OpFunction %2 None %3\n"
710 "%8 = OpVariable %7 Function\n"
711 "%10 = OpVariable %7 Function\n"
713 "%11 = OpLoad %6 %8\n"
714 "%13 = OpFDiv %6 %11 %12\n"
715 "%15 = OpFSub %6 %13 %14\n"
717 "%27 = OpLoad %23 %26\n"
718 "%28 = OpConvertSToF %6 %27\n"
719 "%29 = OpLoad %6 %8\n"
720 "%30 = OpFMul %6 %28 %29\n"
721 "%31 = OpLoad %6 %10\n"
722 "%32 = OpFAdd %6 %30 %31\n"
723 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
724 "%36 = OpAccessChain %35 %22 %24\n"
726 "%39 = OpAccessChain %38 %22 %37\n"
730 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
736 "layout(vertices=1) out;\n"
740 " if (gl_InvocationID == 0)\n"
742 " gl_TessLevelOuter[0] = 1.0f;\n"
743 " gl_TessLevelOuter[1] = 1.0f;\n"
745 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
748 const std::string tescNoSubgroup =
751 "; Generator: Khronos Glslang Reference Front End; 1\n"
754 "OpCapability Tessellation\n"
755 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
756 "OpMemoryModel Logical GLSL450\n"
757 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
758 "OpExecutionMode %4 OutputVertices 1\n"
759 "OpDecorate %8 BuiltIn InvocationId\n"
760 "OpDecorate %20 Patch\n"
761 "OpDecorate %20 BuiltIn TessLevelOuter\n"
762 "OpMemberDecorate %29 0 BuiltIn Position\n"
763 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
764 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
765 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
766 "OpDecorate %29 Block\n"
767 "OpMemberDecorate %34 0 BuiltIn Position\n"
768 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
769 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
770 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
771 "OpDecorate %34 Block\n"
773 "%3 = OpTypeFunction %2\n"
774 "%6 = OpTypeInt 32 1\n"
775 "%7 = OpTypePointer Input %6\n"
776 "%8 = OpVariable %7 Input\n"
777 "%10 = OpConstant %6 0\n"
779 "%15 = OpTypeFloat 32\n"
780 "%16 = OpTypeInt 32 0\n"
781 "%17 = OpConstant %16 4\n"
782 "%18 = OpTypeArray %15 %17\n"
783 "%19 = OpTypePointer Output %18\n"
784 "%20 = OpVariable %19 Output\n"
785 "%21 = OpConstant %15 1\n"
786 "%22 = OpTypePointer Output %15\n"
787 "%24 = OpConstant %6 1\n"
788 "%26 = OpTypeVector %15 4\n"
789 "%27 = OpConstant %16 1\n"
790 "%28 = OpTypeArray %15 %27\n"
791 "%29 = OpTypeStruct %26 %15 %28 %28\n"
792 "%30 = OpTypeArray %29 %27\n"
793 "%31 = OpTypePointer Output %30\n"
794 "%32 = OpVariable %31 Output\n"
795 "%34 = OpTypeStruct %26 %15 %28 %28\n"
796 "%35 = OpConstant %16 32\n"
797 "%36 = OpTypeArray %34 %35\n"
798 "%37 = OpTypePointer Input %36\n"
799 "%38 = OpVariable %37 Input\n"
800 "%40 = OpTypePointer Input %26\n"
801 "%43 = OpTypePointer Output %26\n"
802 "%4 = OpFunction %2 None %3\n"
804 "%9 = OpLoad %6 %8\n"
805 "%12 = OpIEqual %11 %9 %10\n"
806 "OpSelectionMerge %14 None\n"
807 "OpBranchConditional %12 %13 %14\n"
809 "%23 = OpAccessChain %22 %20 %10\n"
811 "%25 = OpAccessChain %22 %20 %24\n"
815 "%33 = OpLoad %6 %8\n"
816 "%39 = OpLoad %6 %8\n"
817 "%41 = OpAccessChain %40 %38 %39 %10\n"
818 "%42 = OpLoad %26 %41\n"
819 "%44 = OpAccessChain %43 %32 %33 %10\n"
823 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
829 "layout(isolines) in;\n"
833 " float pixelSize = 2.0f/1024.0f;\n"
834 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
837 const std::string teseNoSubgroup =
840 "; Generator: Khronos Glslang Reference Front End; 2\n"
843 "OpCapability Tessellation\n"
844 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
845 "OpMemoryModel Logical GLSL450\n"
846 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
847 "OpExecutionMode %4 Isolines\n"
848 "OpExecutionMode %4 SpacingEqual\n"
849 "OpExecutionMode %4 VertexOrderCcw\n"
850 "OpMemberDecorate %14 0 BuiltIn Position\n"
851 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
852 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
853 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
854 "OpDecorate %14 Block\n"
855 "OpMemberDecorate %19 0 BuiltIn Position\n"
856 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
857 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
858 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
859 "OpDecorate %19 Block\n"
860 "OpDecorate %29 BuiltIn TessCoord\n"
862 "%3 = OpTypeFunction %2\n"
863 "%6 = OpTypeFloat 32\n"
864 "%7 = OpTypePointer Function %6\n"
865 "%9 = OpConstant %6 0.00195313\n"
866 "%10 = OpTypeVector %6 4\n"
867 "%11 = OpTypeInt 32 0\n"
868 "%12 = OpConstant %11 1\n"
869 "%13 = OpTypeArray %6 %12\n"
870 "%14 = OpTypeStruct %10 %6 %13 %13\n"
871 "%15 = OpTypePointer Output %14\n"
872 "%16 = OpVariable %15 Output\n"
873 "%17 = OpTypeInt 32 1\n"
874 "%18 = OpConstant %17 0\n"
875 "%19 = OpTypeStruct %10 %6 %13 %13\n"
876 "%20 = OpConstant %11 32\n"
877 "%21 = OpTypeArray %19 %20\n"
878 "%22 = OpTypePointer Input %21\n"
879 "%23 = OpVariable %22 Input\n"
880 "%24 = OpTypePointer Input %10\n"
881 "%27 = OpTypeVector %6 3\n"
882 "%28 = OpTypePointer Input %27\n"
883 "%29 = OpVariable %28 Input\n"
884 "%30 = OpConstant %11 0\n"
885 "%31 = OpTypePointer Input %6\n"
886 "%36 = OpConstant %6 2\n"
887 "%40 = OpTypePointer Output %10\n"
888 "%4 = OpFunction %2 None %3\n"
890 "%8 = OpVariable %7 Function\n"
892 "%25 = OpAccessChain %24 %23 %18 %18\n"
893 "%26 = OpLoad %10 %25\n"
894 "%32 = OpAccessChain %31 %29 %30\n"
895 "%33 = OpLoad %6 %32\n"
896 "%34 = OpLoad %6 %8\n"
897 "%35 = OpFMul %6 %33 %34\n"
898 "%37 = OpFDiv %6 %35 %36\n"
899 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
900 "%39 = OpFAdd %10 %26 %38\n"
901 "%41 = OpAccessChain %40 %16 %18\n"
905 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
911 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
916 DE_FATAL("Unhandled stage!");
918 case VK_SHADER_STAGE_FRAGMENT_BIT:
923 " float pixelSize = 2.0f/1024.0f;\n"
924 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
925 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
927 case VK_SHADER_STAGE_GEOMETRY_BIT:
933 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
934 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
943 bool vkt::subgroups::isSubgroupSupported(Context& context)
945 return context.contextSupports(vk::ApiVersion(1, 1, 0));
948 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
949 Context& context, const VkShaderStageFlags stage)
951 VkPhysicalDeviceSubgroupProperties subgroupProperties;
952 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
953 subgroupProperties.pNext = DE_NULL;
955 VkPhysicalDeviceProperties2 properties;
956 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
957 properties.pNext = &subgroupProperties;
959 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
961 return (stage & subgroupProperties.supportedStages) ? true : false;
964 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
965 VkShaderStageFlags stage)
971 case VK_SHADER_STAGE_COMPUTE_BIT:
976 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
978 VkSubgroupFeatureFlagBits bit) {
979 VkPhysicalDeviceSubgroupProperties subgroupProperties;
980 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
981 subgroupProperties.pNext = DE_NULL;
983 VkPhysicalDeviceProperties2 properties;
984 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
985 properties.pNext = &subgroupProperties;
987 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
989 return (bit & subgroupProperties.supportedOperations) ? true : false;
992 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
994 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
995 context.getInstanceInterface(), context.getPhysicalDevice());
996 return features.fragmentStoresAndAtomics ? true : false;
999 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1001 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1002 context.getInstanceInterface(), context.getPhysicalDevice());
1003 return features.vertexPipelineStoresAndAtomics ? true : false;
1006 bool vkt::subgroups::isDoubleSupportedForDevice(Context& context)
1008 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1009 context.getInstanceInterface(), context.getPhysicalDevice());
1010 return features.shaderFloat64 ? true : false;
1013 bool vkt::subgroups::isDoubleFormat(VkFormat format)
1019 case VK_FORMAT_R64_SFLOAT:
1020 case VK_FORMAT_R64G64_SFLOAT:
1021 case VK_FORMAT_R64G64B64_SFLOAT:
1022 case VK_FORMAT_R64G64B64A64_SFLOAT:
1027 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1032 DE_FATAL("Unhandled format!");
1034 case VK_FORMAT_R32_SINT:
1036 case VK_FORMAT_R32G32_SINT:
1038 case VK_FORMAT_R32G32B32_SINT:
1040 case VK_FORMAT_R32G32B32A32_SINT:
1042 case VK_FORMAT_R32_UINT:
1044 case VK_FORMAT_R32G32_UINT:
1046 case VK_FORMAT_R32G32B32_UINT:
1048 case VK_FORMAT_R32G32B32A32_UINT:
1050 case VK_FORMAT_R32_SFLOAT:
1052 case VK_FORMAT_R32G32_SFLOAT:
1054 case VK_FORMAT_R32G32B32_SFLOAT:
1056 case VK_FORMAT_R32G32B32A32_SFLOAT:
1058 case VK_FORMAT_R64_SFLOAT:
1060 case VK_FORMAT_R64G64_SFLOAT:
1062 case VK_FORMAT_R64G64B64_SFLOAT:
1064 case VK_FORMAT_R64G64B64A64_SFLOAT:
1066 case VK_FORMAT_R8_USCALED:
1068 case VK_FORMAT_R8G8_USCALED:
1070 case VK_FORMAT_R8G8B8_USCALED:
1072 case VK_FORMAT_R8G8B8A8_USCALED:
1077 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
1080 "layout(location = 0) in highp vec4 in_position;\n"
1081 "void main (void)\n"
1083 " gl_Position = in_position;\n"
1086 programCollection.spirvAsmSources.add("vert") <<
1089 "; Generator: Khronos Glslang Reference Front End; 2\n"
1092 "OpCapability Shader\n"
1093 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1094 "OpMemoryModel Logical GLSL450\n"
1095 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
1096 "OpMemberDecorate %11 0 BuiltIn Position\n"
1097 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1098 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1099 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1100 "OpDecorate %11 Block\n"
1101 "OpDecorate %17 Location 0\n"
1103 "%3 = OpTypeFunction %2\n"
1104 "%6 = OpTypeFloat 32\n"
1105 "%7 = OpTypeVector %6 4\n"
1106 "%8 = OpTypeInt 32 0\n"
1107 "%9 = OpConstant %8 1\n"
1108 "%10 = OpTypeArray %6 %9\n"
1109 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1110 "%12 = OpTypePointer Output %11\n"
1111 "%13 = OpVariable %12 Output\n"
1112 "%14 = OpTypeInt 32 1\n"
1113 "%15 = OpConstant %14 0\n"
1114 "%16 = OpTypePointer Input %7\n"
1115 "%17 = OpVariable %16 Input\n"
1116 "%19 = OpTypePointer Output %7\n"
1117 "%4 = OpFunction %2 None %3\n"
1119 "%18 = OpLoad %7 %17\n"
1120 "%20 = OpAccessChain %19 %13 %15\n"
1126 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
1129 "layout(location = 0) in float in_color;\n"
1130 "layout(location = 0) out uint out_color;\n"
1133 " out_color = uint(in_color);\n"
1136 programCollection.spirvAsmSources.add("fragment") <<
1139 "; Generator: Khronos Glslang Reference Front End; 2\n"
1142 "OpCapability Shader\n"
1143 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1144 "OpMemoryModel Logical GLSL450\n"
1145 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
1146 "OpExecutionMode %4 OriginUpperLeft\n"
1147 "OpDecorate %8 Location 0\n"
1148 "OpDecorate %11 Location 0\n"
1150 "%3 = OpTypeFunction %2\n"
1151 "%6 = OpTypeInt 32 0\n"
1152 "%7 = OpTypePointer Output %6\n"
1153 "%8 = OpVariable %7 Output\n"
1154 "%9 = OpTypeFloat 32\n"
1155 "%10 = OpTypePointer Input %9\n"
1156 "%11 = OpVariable %10 Input\n"
1157 "%4 = OpFunction %2 None %3\n"
1159 "%12 = OpLoad %9 %11\n"
1160 "%13 = OpConvertFToU %6 %12\n"
1166 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
1169 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1170 "#extension GL_EXT_tessellation_shader : require\n"
1171 "layout(vertices = 2) out;\n"
1172 "void main (void)\n"
1174 " if (gl_InvocationID == 0)\n"
1176 " gl_TessLevelOuter[0] = 1.0f;\n"
1177 " gl_TessLevelOuter[1] = 1.0f;\n"
1179 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1182 programCollection.spirvAsmSources.add("tesc") <<
1185 "; Generator: Khronos Glslang Reference Front End; 2\n"
1188 "OpCapability Tessellation\n"
1189 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1190 "OpMemoryModel Logical GLSL450\n"
1191 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
1192 "OpExecutionMode %4 OutputVertices 2\n"
1193 "OpDecorate %8 BuiltIn InvocationId\n"
1194 "OpDecorate %20 Patch\n"
1195 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1196 "OpMemberDecorate %29 0 BuiltIn Position\n"
1197 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1198 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1199 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1200 "OpDecorate %29 Block\n"
1201 "OpMemberDecorate %35 0 BuiltIn Position\n"
1202 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
1203 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
1204 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
1205 "OpDecorate %35 Block\n"
1207 "%3 = OpTypeFunction %2\n"
1208 "%6 = OpTypeInt 32 1\n"
1209 "%7 = OpTypePointer Input %6\n"
1210 "%8 = OpVariable %7 Input\n"
1211 "%10 = OpConstant %6 0\n"
1212 "%11 = OpTypeBool\n"
1213 "%15 = OpTypeFloat 32\n"
1214 "%16 = OpTypeInt 32 0\n"
1215 "%17 = OpConstant %16 4\n"
1216 "%18 = OpTypeArray %15 %17\n"
1217 "%19 = OpTypePointer Output %18\n"
1218 "%20 = OpVariable %19 Output\n"
1219 "%21 = OpConstant %15 1\n"
1220 "%22 = OpTypePointer Output %15\n"
1221 "%24 = OpConstant %6 1\n"
1222 "%26 = OpTypeVector %15 4\n"
1223 "%27 = OpConstant %16 1\n"
1224 "%28 = OpTypeArray %15 %27\n"
1225 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1226 "%30 = OpConstant %16 2\n"
1227 "%31 = OpTypeArray %29 %30\n"
1228 "%32 = OpTypePointer Output %31\n"
1229 "%33 = OpVariable %32 Output\n"
1230 "%35 = OpTypeStruct %26 %15 %28 %28\n"
1231 "%36 = OpConstant %16 32\n"
1232 "%37 = OpTypeArray %35 %36\n"
1233 "%38 = OpTypePointer Input %37\n"
1234 "%39 = OpVariable %38 Input\n"
1235 "%41 = OpTypePointer Input %26\n"
1236 "%44 = OpTypePointer Output %26\n"
1237 "%4 = OpFunction %2 None %3\n"
1239 "%9 = OpLoad %6 %8\n"
1240 "%12 = OpIEqual %11 %9 %10\n"
1241 "OpSelectionMerge %14 None\n"
1242 "OpBranchConditional %12 %13 %14\n"
1244 "%23 = OpAccessChain %22 %20 %10\n"
1246 "%25 = OpAccessChain %22 %20 %24\n"
1250 "%34 = OpLoad %6 %8\n"
1251 "%40 = OpLoad %6 %8\n"
1252 "%42 = OpAccessChain %41 %39 %40 %10\n"
1253 "%43 = OpLoad %26 %42\n"
1254 "%45 = OpAccessChain %44 %33 %34 %10\n"
1260 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
1263 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1264 "#extension GL_EXT_tessellation_shader : require\n"
1265 "layout(isolines, equal_spacing, ccw ) in;\n"
1266 "layout(location = 0) in float in_color[];\n"
1267 "layout(location = 0) out float out_color;\n"
1269 "void main (void)\n"
1271 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1272 " out_color = in_color[0];\n"
1275 programCollection.spirvAsmSources.add("tese") <<
1278 "; Generator: Khronos Glslang Reference Front End; 2\n"
1281 "OpCapability Tessellation\n"
1282 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1283 "OpMemoryModel Logical GLSL450\n"
1284 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
1285 "OpExecutionMode %4 Isolines\n"
1286 "OpExecutionMode %4 SpacingEqual\n"
1287 "OpExecutionMode %4 VertexOrderCcw\n"
1288 "OpMemberDecorate %11 0 BuiltIn Position\n"
1289 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1290 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1291 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1292 "OpDecorate %11 Block\n"
1293 "OpMemberDecorate %16 0 BuiltIn Position\n"
1294 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
1295 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
1296 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
1297 "OpDecorate %16 Block\n"
1298 "OpDecorate %29 BuiltIn TessCoord\n"
1299 "OpDecorate %39 Location 0\n"
1300 "OpDecorate %42 Location 0\n"
1302 "%3 = OpTypeFunction %2\n"
1303 "%6 = OpTypeFloat 32\n"
1304 "%7 = OpTypeVector %6 4\n"
1305 "%8 = OpTypeInt 32 0\n"
1306 "%9 = OpConstant %8 1\n"
1307 "%10 = OpTypeArray %6 %9\n"
1308 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1309 "%12 = OpTypePointer Output %11\n"
1310 "%13 = OpVariable %12 Output\n"
1311 "%14 = OpTypeInt 32 1\n"
1312 "%15 = OpConstant %14 0\n"
1313 "%16 = OpTypeStruct %7 %6 %10 %10\n"
1314 "%17 = OpConstant %8 32\n"
1315 "%18 = OpTypeArray %16 %17\n"
1316 "%19 = OpTypePointer Input %18\n"
1317 "%20 = OpVariable %19 Input\n"
1318 "%21 = OpTypePointer Input %7\n"
1319 "%24 = OpConstant %14 1\n"
1320 "%27 = OpTypeVector %6 3\n"
1321 "%28 = OpTypePointer Input %27\n"
1322 "%29 = OpVariable %28 Input\n"
1323 "%30 = OpConstant %8 0\n"
1324 "%31 = OpTypePointer Input %6\n"
1325 "%36 = OpTypePointer Output %7\n"
1326 "%38 = OpTypePointer Output %6\n"
1327 "%39 = OpVariable %38 Output\n"
1328 "%40 = OpTypeArray %6 %17\n"
1329 "%41 = OpTypePointer Input %40\n"
1330 "%42 = OpVariable %41 Input\n"
1331 "%4 = OpFunction %2 None %3\n"
1333 "%22 = OpAccessChain %21 %20 %15 %15\n"
1334 "%23 = OpLoad %7 %22\n"
1335 "%25 = OpAccessChain %21 %20 %24 %15\n"
1336 "%26 = OpLoad %7 %25\n"
1337 "%32 = OpAccessChain %31 %29 %30\n"
1338 "%33 = OpLoad %6 %32\n"
1339 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
1340 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
1341 "%37 = OpAccessChain %36 %13 %15\n"
1343 "%43 = OpAccessChain %31 %42 %15\n"
1344 "%44 = OpLoad %6 %43\n"
1350 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection)
1352 tcu::StringTemplate geometryTemplate(glslTemplate);
1354 map<string, string> linesParams;
1355 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
1357 map<string, string> pointsParams;
1358 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
1360 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
1361 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
1364 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
1366 tcu::StringTemplate geometryTemplate(spirvTemplate);
1368 map<string, string> linesParams;
1369 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
1371 map<string, string> pointsParams;
1372 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
1374 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
1375 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
1378 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
1380 const vk::VkFormat format = data.format;
1381 const vk::VkDeviceSize size = getFormatSizeInBytes(format) * data.numElements;
1382 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
1384 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
1389 DE_FATAL("Illegal buffer format");
1391 case VK_FORMAT_R8_USCALED:
1392 case VK_FORMAT_R8G8_USCALED:
1393 case VK_FORMAT_R8G8B8_USCALED:
1394 case VK_FORMAT_R8G8B8A8_USCALED:
1395 case VK_FORMAT_R32_SINT:
1396 case VK_FORMAT_R32G32_SINT:
1397 case VK_FORMAT_R32G32B32_SINT:
1398 case VK_FORMAT_R32G32B32A32_SINT:
1399 case VK_FORMAT_R32_UINT:
1400 case VK_FORMAT_R32G32_UINT:
1401 case VK_FORMAT_R32G32B32_UINT:
1402 case VK_FORMAT_R32G32B32A32_UINT:
1404 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1406 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
1408 ptr[k] = rnd.getUint32();
1412 case VK_FORMAT_R32_SFLOAT:
1413 case VK_FORMAT_R32G32_SFLOAT:
1414 case VK_FORMAT_R32G32B32_SFLOAT:
1415 case VK_FORMAT_R32G32B32A32_SFLOAT:
1417 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
1419 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
1421 ptr[k] = rnd.getFloat();
1425 case VK_FORMAT_R64_SFLOAT:
1426 case VK_FORMAT_R64G64_SFLOAT:
1427 case VK_FORMAT_R64G64B64_SFLOAT:
1428 case VK_FORMAT_R64G64B64A64_SFLOAT:
1430 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
1432 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
1434 ptr[k] = rnd.getDouble();
1440 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
1442 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1444 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
1450 if (subgroups::SSBOData::InitializeNone != data.initializeType)
1452 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1456 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
1460 case VK_SHADER_STAGE_VERTEX_BIT:
1463 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1466 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1469 case VK_SHADER_STAGE_GEOMETRY_BIT:
1480 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
1481 Context& context, VkFormat format, SSBOData* extraData,
1482 deUint32 extraDataCount,
1483 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1484 const VkShaderStageFlags shaderStage)
1486 const deUint32 maxWidth = 1024u;
1487 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
1488 DescriptorSetLayoutBuilder layoutBuilder;
1489 DescriptorPoolBuilder poolBuilder;
1490 DescriptorSetUpdateBuilder updateBuilder;
1491 Move <VkDescriptorPool> descriptorPool;
1492 Move <VkDescriptorSet> descriptorSet;
1494 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1495 context.getBinaryCollection().get("vert"), 0u));
1496 const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1497 context.getBinaryCollection().get("tesc"), 0u));
1498 const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1499 context.getBinaryCollection().get("tese"), 0u));
1500 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1501 context.getBinaryCollection().get("fragment"), 0u));
1502 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
1504 const VkVertexInputBindingDescription vertexInputBinding =
1507 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
1508 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
1511 const VkVertexInputAttributeDescription vertexInputAttribute =
1515 VK_FORMAT_R32G32B32A32_SFLOAT,
1519 for (deUint32 i = 0u; i < extraDataCount; i++)
1521 if (extraData[i].isImage)
1523 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1527 vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
1528 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
1530 const Allocation& alloc = inputBuffers[i]->getAllocation();
1531 initializeMemory(context, alloc, extraData[i]);
1534 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1535 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
1537 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
1539 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout));
1541 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
1542 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
1543 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1544 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
1545 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
1547 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1548 poolBuilder.addType(inputBuffers[ndx]->getType());
1550 if (extraDataCount > 0)
1552 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
1553 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1554 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
1557 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
1559 if (inputBuffers[buffersNdx]->isImage())
1561 VkDescriptorImageInfo info =
1562 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
1563 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1565 updateBuilder.writeSingle(*descriptorSet,
1566 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1567 inputBuffers[buffersNdx]->getType(), &info);
1571 VkDescriptorBufferInfo info =
1572 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
1573 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
1575 updateBuilder.writeSingle(*descriptorSet,
1576 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1577 inputBuffers[buffersNdx]->getType(), &info);
1581 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
1583 const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
1584 const deUint32 subgroupSize = getSubgroupSize(context);
1585 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
1586 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
1587 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1588 unsigned totalIterations = 0u;
1589 unsigned failedIterations = 0u;
1590 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1593 const Allocation& alloc = vertexBuffer.getAllocation();
1594 std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
1595 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
1596 float leftHandPosition = -1.0f;
1598 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
1600 data[ndx][0] = leftHandPosition;
1601 leftHandPosition += pixelSize;
1602 data[ndx+1][0] = leftHandPosition;
1605 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
1606 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1609 for (deUint32 width = 1u; width < maxWidth; ++width)
1611 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
1612 const VkViewport viewport = makeViewport(maxWidth, 1u);
1613 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
1614 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
1615 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1616 const VkDeviceSize vertexBufferOffset = 0u;
1620 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1623 context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
1624 context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
1626 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
1628 context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
1630 if (extraDataCount > 0)
1632 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
1633 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
1634 &descriptorSet.get(), 0u, DE_NULL);
1637 context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
1638 context.getDeviceInterface().cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
1640 endRenderPass(context.getDeviceInterface(), *cmdBuffer);
1642 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1643 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1645 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
1646 waitFence(context, fence);
1650 const Allocation& allocResult = imageBufferResult.getAllocation();
1651 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
1653 std::vector<const void*> datas;
1654 datas.push_back(allocResult.getHostPtr());
1655 if (!checkResult(datas, width/2u, subgroupSize))
1660 if (0 < failedIterations)
1662 context.getTestContext().getLog()
1663 << TestLog::Message << (totalIterations - failedIterations) << " / "
1664 << totalIterations << " values passed" << TestLog::EndMessage;
1665 return tcu::TestStatus::fail("Failed!");
1668 return tcu::TestStatus::pass("OK");
1671 bool vkt::subgroups::check(std::vector<const void*> datas,
1672 deUint32 width, deUint32 ref)
1674 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
1676 for (deUint32 n = 0; n < width; ++n)
1687 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
1688 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
1691 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
1692 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
1693 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
1695 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
1698 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
1699 Context& context, VkFormat format, SSBOData* extraData,
1700 deUint32 extraDataCount,
1701 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
1703 const deUint32 maxWidth = 1024u;
1704 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
1705 DescriptorSetLayoutBuilder layoutBuilder;
1706 DescriptorPoolBuilder poolBuilder;
1707 DescriptorSetUpdateBuilder updateBuilder;
1708 Move <VkDescriptorPool> descriptorPool;
1709 Move <VkDescriptorSet> descriptorSet;
1711 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1712 context.getBinaryCollection().get("vert"), 0u));
1713 const Unique<VkShaderModule> geometryShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1714 context.getBinaryCollection().get("geometry"), 0u));
1715 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1716 context.getBinaryCollection().get("fragment"), 0u));
1717 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
1718 const VkVertexInputBindingDescription vertexInputBinding =
1721 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
1722 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
1725 const VkVertexInputAttributeDescription vertexInputAttribute =
1729 VK_FORMAT_R32G32B32A32_SFLOAT,
1733 for (deUint32 i = 0u; i < extraDataCount; i++)
1735 if (extraData[i].isImage)
1737 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1741 vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
1742 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
1744 const Allocation& alloc = inputBuffers[i]->getAllocation();
1745 initializeMemory(context, alloc, extraData[i]);
1748 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1749 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
1751 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
1753 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout));
1755 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
1756 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
1757 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
1758 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
1760 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1761 poolBuilder.addType(inputBuffers[ndx]->getType());
1763 if (extraDataCount > 0)
1765 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
1766 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1767 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
1770 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
1772 if (inputBuffers[buffersNdx]->isImage())
1774 VkDescriptorImageInfo info =
1775 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
1776 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1778 updateBuilder.writeSingle(*descriptorSet,
1779 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1780 inputBuffers[buffersNdx]->getType(), &info);
1784 VkDescriptorBufferInfo info =
1785 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
1786 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
1788 updateBuilder.writeSingle(*descriptorSet,
1789 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1790 inputBuffers[buffersNdx]->getType(), &info);
1794 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
1796 const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
1797 const deUint32 subgroupSize = getSubgroupSize(context);
1798 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
1799 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
1800 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1801 unsigned totalIterations = 0u;
1802 unsigned failedIterations = 0u;
1803 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1806 const Allocation& alloc = vertexBuffer.getAllocation();
1807 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
1808 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
1809 float leftHandPosition = -1.0f;
1811 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
1813 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
1814 leftHandPosition += pixelSize;
1817 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
1818 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1821 for (deUint32 width = 1u; width < maxWidth; width++)
1824 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
1825 const VkViewport viewport = makeViewport(maxWidth, 1u);
1826 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
1827 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
1828 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1829 const VkDeviceSize vertexBufferOffset = 0u;
1831 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
1833 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
1834 initializeMemory(context, alloc, extraData[ndx]);
1837 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1839 context.getDeviceInterface().cmdSetViewport(
1840 *cmdBuffer, 0, 1, &viewport);
1842 context.getDeviceInterface().cmdSetScissor(
1843 *cmdBuffer, 0, 1, &scissor);
1845 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
1847 context.getDeviceInterface().cmdBindPipeline(
1848 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
1850 if (extraDataCount > 0)
1852 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
1853 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
1854 &descriptorSet.get(), 0u, DE_NULL);
1857 context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
1859 context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
1861 endRenderPass(context.getDeviceInterface(), *cmdBuffer);
1863 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1865 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1866 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
1867 waitFence(context, fence);
1871 const Allocation& allocResult = imageBufferResult.getAllocation();
1872 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
1874 std::vector<const void*> datas;
1875 datas.push_back(allocResult.getHostPtr());
1876 if (!checkResult(datas, width, subgroupSize))
1881 if (0 < failedIterations)
1883 context.getTestContext().getLog()
1884 << TestLog::Message << (totalIterations - failedIterations) << " / "
1885 << totalIterations << " values passed" << TestLog::EndMessage;
1886 return tcu::TestStatus::fail("Failed!");
1889 return tcu::TestStatus::pass("OK");
1893 tcu::TestStatus vkt::subgroups::allStages(
1894 Context& context, VkFormat format, SSBOData* extraDatas,
1895 deUint32 extraDatasCount,
1896 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1897 const VkShaderStageFlags shaderStageTested)
1899 const deUint32 maxWidth = 1024u;
1900 vector<VkShaderStageFlagBits> stagesVector;
1901 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
1903 Move<VkShaderModule> vertexShaderModule;
1904 Move<VkShaderModule> teCtrlShaderModule;
1905 Move<VkShaderModule> teEvalShaderModule;
1906 Move<VkShaderModule> geometryShaderModule;
1907 Move<VkShaderModule> fragmentShaderModule;
1909 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
1911 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
1913 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1915 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
1916 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
1917 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
1919 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1921 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
1922 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
1923 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1925 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
1927 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
1928 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
1929 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
1931 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
1933 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
1934 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
1937 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
1938 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
1939 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
1940 const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
1942 shaderStageRequired = shaderStageTested | shaderStageRequired;
1944 vertexShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(vert), 0u);
1945 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1947 teCtrlShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tesc), 0u);
1948 teEvalShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tese), 0u);
1950 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
1952 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1954 // tessellation shaders output line primitives
1955 geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_lines"), 0u);
1959 // otherwise points are processed by geometry shader
1960 geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_points"), 0u);
1963 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
1964 fragmentShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u);
1966 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
1968 DescriptorSetLayoutBuilder layoutBuilder;
1969 // The implicit result SSBO we use to store our outputs from the shader
1970 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
1972 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
1973 const VkDeviceSize size = getFormatSizeInBytes(format) * shaderSize;
1974 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
1976 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
1979 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
1981 const deUint32 datasNdx = ndx - stagesCount;
1982 if (extraDatas[datasNdx].isImage)
1984 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
1988 const vk::VkDeviceSize size = getFormatSizeInBytes(extraDatas[datasNdx].format) * extraDatas[datasNdx].numElements;
1989 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
1992 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
1993 initializeMemory(context, alloc, extraDatas[datasNdx]);
1995 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
1996 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
1999 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2000 layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2002 const Unique<VkPipelineLayout> pipelineLayout(
2003 makePipelineLayout(context, *descriptorSetLayout));
2005 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2006 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2007 shaderStageRequired,
2008 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
2010 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
2012 DescriptorPoolBuilder poolBuilder;
2014 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
2016 poolBuilder.addType(inputBuffers[ndx]->getType());
2019 const Unique<VkDescriptorPool> descriptorPool(
2020 poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2021 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2023 // Create descriptor set
2024 const Unique<VkDescriptorSet> descriptorSet(
2025 makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
2027 DescriptorSetUpdateBuilder updateBuilder;
2029 for (deUint32 ndx = 0u; ndx < stagesCount; ndx++)
2031 if (inputBuffers[ndx]->isImage())
2033 VkDescriptorImageInfo info =
2034 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2035 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2037 updateBuilder.writeSingle(*descriptorSet,
2038 DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
2039 inputBuffers[ndx]->getType(), &info);
2043 VkDescriptorBufferInfo info =
2044 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2045 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2047 updateBuilder.writeSingle(*descriptorSet,
2048 DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
2049 inputBuffers[ndx]->getType(), &info);
2053 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ndx++)
2055 if (inputBuffers[ndx]->isImage())
2057 VkDescriptorImageInfo info =
2058 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2059 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2061 updateBuilder.writeSingle(*descriptorSet,
2062 DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx -stagesCount].binding),
2063 inputBuffers[ndx]->getType(), &info);
2067 VkDescriptorBufferInfo info =
2068 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2069 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2071 updateBuilder.writeSingle(*descriptorSet,
2072 DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx - stagesCount].binding),
2073 inputBuffers[ndx]->getType(), &info);
2076 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2079 const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
2080 const deUint32 subgroupSize = getSubgroupSize(context);
2081 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2082 unsigned totalIterations = 0u;
2083 unsigned failedIterations = 0u;
2084 Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2085 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, resultImage.getImageView(), maxWidth, 1));
2086 const VkViewport viewport = makeViewport(maxWidth, 1u);
2087 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2088 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2089 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2090 const VkImageSubresourceRange subresourceRange =
2092 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
2093 0u, //deUint32 baseMipLevel
2094 1u, //deUint32 levelCount
2095 0u, //deUint32 baseArrayLayer
2096 1u //deUint32 layerCount
2099 const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier(
2100 (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2101 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2102 resultImage.getImage(), subresourceRange);
2104 for (deUint32 width = 1u; width < maxWidth; width++)
2106 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
2109 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2110 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
2115 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2117 context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
2119 context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2121 context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2123 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2125 context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2127 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2128 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2129 &descriptorSet.get(), 0u, DE_NULL);
2131 context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
2133 endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2135 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2137 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2139 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2140 waitFence(context, fence);
2142 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
2144 std::vector<const void*> datas;
2145 if (!inputBuffers[ndx]->isImage())
2147 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
2148 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2149 // we always have our result data first
2150 datas.push_back(resultAlloc.getHostPtr());
2153 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2155 const deUint32 datasNdx = index - stagesCount;
2156 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
2158 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
2159 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2160 // we always have our result data first
2161 datas.push_back(resultAlloc.getHostPtr());
2165 if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
2168 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2170 std::vector<const void*> datas;
2171 const Allocation& resultAlloc = imageBufferResult.getAllocation();
2172 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2174 // we always have our result data first
2175 datas.push_back(resultAlloc.getHostPtr());
2177 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2179 const deUint32 datasNdx = index - stagesCount;
2180 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
2182 const Allocation& alloc = inputBuffers[index]->getAllocation();
2183 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2184 // we always have our result data first
2185 datas.push_back(alloc.getHostPtr());
2189 if (!checkResult(datas, width , subgroupSize))
2193 context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2196 if (0 < failedIterations)
2198 context.getTestContext().getLog()
2199 << TestLog::Message << (totalIterations - failedIterations) << " / "
2200 << totalIterations << " values passed" << TestLog::EndMessage;
2201 return tcu::TestStatus::fail("Failed!");
2205 return tcu::TestStatus::pass("OK");
2208 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
2209 SSBOData* extraData, deUint32 extraDataCount,
2210 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2212 const deUint32 maxWidth = 1024u;
2213 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2214 DescriptorSetLayoutBuilder layoutBuilder;
2215 const Unique<VkShaderModule> vertexShaderModule (createShaderModule
2216 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
2217 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule
2218 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
2219 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2221 const VkVertexInputBindingDescription vertexInputBinding =
2224 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
2225 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
2228 const VkVertexInputAttributeDescription vertexInputAttribute =
2232 VK_FORMAT_R32G32B32A32_SFLOAT,
2236 for (deUint32 i = 0u; i < extraDataCount; i++)
2238 if (extraData[i].isImage)
2240 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2244 vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
2245 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2247 const Allocation& alloc = inputBuffers[i]->getAllocation();
2248 initializeMemory(context, alloc, extraData[i]);
2251 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2252 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
2254 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2256 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout));
2258 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2259 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2260 *vertexShaderModule, *fragmentShaderModule,
2261 DE_NULL, DE_NULL, DE_NULL,
2262 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
2263 &vertexInputBinding, &vertexInputAttribute, true, format));
2264 DescriptorPoolBuilder poolBuilder;
2265 DescriptorSetUpdateBuilder updateBuilder;
2268 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2269 poolBuilder.addType(inputBuffers[ndx]->getType());
2271 Move <VkDescriptorPool> descriptorPool;
2272 Move <VkDescriptorSet> descriptorSet;
2274 if (extraDataCount > 0)
2276 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2277 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2278 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
2281 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2283 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2284 initializeMemory(context, alloc, extraData[ndx]);
2287 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2289 if (inputBuffers[buffersNdx]->isImage())
2291 VkDescriptorImageInfo info =
2292 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2293 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2295 updateBuilder.writeSingle(*descriptorSet,
2296 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2297 inputBuffers[buffersNdx]->getType(), &info);
2301 VkDescriptorBufferInfo info =
2302 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2303 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2305 updateBuilder.writeSingle(*descriptorSet,
2306 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2307 inputBuffers[buffersNdx]->getType(), &info);
2310 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2312 const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
2314 const deUint32 subgroupSize = getSubgroupSize(context);
2316 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2318 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2319 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2321 unsigned totalIterations = 0u;
2322 unsigned failedIterations = 0u;
2324 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2327 const Allocation& alloc = vertexBuffer.getAllocation();
2328 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2329 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2330 float leftHandPosition = -1.0f;
2332 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2334 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2335 leftHandPosition += pixelSize;
2338 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2339 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2342 for (deUint32 width = 1u; width < maxWidth; width++)
2345 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
2346 const VkViewport viewport = makeViewport(maxWidth, 1u);
2347 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2348 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2349 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2350 const VkDeviceSize vertexBufferOffset = 0u;
2352 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2354 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2355 initializeMemory(context, alloc, extraData[ndx]);
2358 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2360 context.getDeviceInterface().cmdSetViewport(
2361 *cmdBuffer, 0, 1, &viewport);
2363 context.getDeviceInterface().cmdSetScissor(
2364 *cmdBuffer, 0, 1, &scissor);
2366 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2368 context.getDeviceInterface().cmdBindPipeline(
2369 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2371 if (extraDataCount > 0)
2373 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2374 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2375 &descriptorSet.get(), 0u, DE_NULL);
2378 context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2380 context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2382 endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2384 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2386 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2387 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2388 waitFence(context, fence);
2392 const Allocation& allocResult = imageBufferResult.getAllocation();
2393 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
2395 std::vector<const void*> datas;
2396 datas.push_back(allocResult.getHostPtr());
2397 if (!checkResult(datas, width, subgroupSize))
2402 if (0 < failedIterations)
2404 context.getTestContext().getLog()
2405 << TestLog::Message << (totalIterations - failedIterations) << " / "
2406 << totalIterations << " values passed" << TestLog::EndMessage;
2407 return tcu::TestStatus::fail("Failed!");
2410 return tcu::TestStatus::pass("OK");
2414 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context& context, VkFormat format, SSBOData* extraDatas,
2415 deUint32 extraDatasCount,
2416 bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
2417 deUint32 height, deUint32 subgroupSize))
2419 const Unique<VkShaderModule> vertexShaderModule (createShaderModule
2420 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
2421 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule
2422 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
2424 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
2426 for (deUint32 i = 0; i < extraDatasCount; i++)
2428 if (extraDatas[i].isImage)
2430 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
2431 static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
2435 vk::VkDeviceSize size =
2436 getFormatSizeInBytes(extraDatas[i].format) * extraDatas[i].numElements;
2437 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2440 const Allocation& alloc = inputBuffers[i]->getAllocation();
2441 initializeMemory(context, alloc, extraDatas[i]);
2444 DescriptorSetLayoutBuilder layoutBuilder;
2446 for (deUint32 i = 0; i < extraDatasCount; i++)
2448 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
2449 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
2452 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2453 layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2455 const Unique<VkPipelineLayout> pipelineLayout(
2456 makePipelineLayout(context, *descriptorSetLayout));
2458 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2459 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2460 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2461 *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
2462 DE_NULL, DE_NULL, true));
2464 DescriptorPoolBuilder poolBuilder;
2466 // To stop validation complaining, always add at least one type to pool.
2467 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2468 for (deUint32 i = 0; i < extraDatasCount; i++)
2470 poolBuilder.addType(inputBuffers[i]->getType());
2473 Move<VkDescriptorPool> descriptorPool;
2474 // Create descriptor set
2475 Move<VkDescriptorSet> descriptorSet;
2477 if (extraDatasCount > 0)
2479 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2480 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2482 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
2485 DescriptorSetUpdateBuilder updateBuilder;
2487 for (deUint32 i = 0; i < extraDatasCount; i++)
2489 if (inputBuffers[i]->isImage())
2491 VkDescriptorImageInfo info =
2492 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
2493 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2495 updateBuilder.writeSingle(*descriptorSet,
2496 DescriptorSetUpdateBuilder::Location::binding(i),
2497 inputBuffers[i]->getType(), &info);
2501 VkDescriptorBufferInfo info =
2502 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
2503 0ull, inputBuffers[i]->getAsBuffer()->getSize());
2505 updateBuilder.writeSingle(*descriptorSet,
2506 DescriptorSetUpdateBuilder::Location::binding(i),
2507 inputBuffers[i]->getType(), &info);
2511 if (extraDatasCount > 0)
2512 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2514 const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
2516 const deUint32 subgroupSize = getSubgroupSize(context);
2518 const Unique<VkCommandBuffer> cmdBuffer(
2519 makeCommandBuffer(context, *cmdPool));
2521 unsigned totalIterations = 0;
2522 unsigned failedIterations = 0;
2524 for (deUint32 width = 8; width <= subgroupSize; width *= 2)
2526 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
2531 for (deUint32 i = 0; i < extraDatasCount; i++)
2533 const Allocation& alloc = inputBuffers[i]->getAllocation();
2534 initializeMemory(context, alloc, extraDatas[i]);
2537 VkDeviceSize formatSize = getFormatSizeInBytes(format);
2538 const VkDeviceSize resultImageSizeInBytes =
2539 width * height * formatSize;
2541 Image resultImage(context, width, height, format,
2542 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
2543 VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2545 Buffer resultBuffer(context, resultImageSizeInBytes,
2546 VK_IMAGE_USAGE_TRANSFER_DST_BIT);
2548 const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
2549 *renderPass, resultImage.getImageView(), width, height));
2551 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2553 VkViewport viewport = makeViewport(width, height);
2555 context.getDeviceInterface().cmdSetViewport(
2556 *cmdBuffer, 0, 1, &viewport);
2558 VkRect2D scissor = {{0, 0}, {width, height}};
2560 context.getDeviceInterface().cmdSetScissor(
2561 *cmdBuffer, 0, 1, &scissor);
2563 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
2565 context.getDeviceInterface().cmdBindPipeline(
2566 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2568 if (extraDatasCount > 0)
2570 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2571 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2572 &descriptorSet.get(), 0u, DE_NULL);
2575 context.getDeviceInterface().cmdDraw(*cmdBuffer, 4, 1, 0, 0);
2577 endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2579 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2581 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2583 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2585 waitFence(context, fence);
2587 std::vector<const void*> datas;
2589 const Allocation& resultAlloc = resultBuffer.getAllocation();
2590 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2592 // we always have our result data first
2593 datas.push_back(resultAlloc.getHostPtr());
2596 if (!checkResult(datas, width, height, subgroupSize))
2601 context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2605 if (0 < failedIterations)
2607 context.getTestContext().getLog()
2608 << TestLog::Message << (totalIterations - failedIterations) << " / "
2609 << totalIterations << " values passed" << TestLog::EndMessage;
2610 return tcu::TestStatus::fail("Failed!");
2613 return tcu::TestStatus::pass("OK");
2616 tcu::TestStatus vkt::subgroups::makeComputeTest(
2617 Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
2618 bool (*checkResult)(std::vector<const void*> datas,
2619 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2620 deUint32 subgroupSize))
2622 VkDeviceSize elementSize = getFormatSizeInBytes(format);
2624 const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
2625 maxSupportedSubgroupSize() *
2626 maxSupportedSubgroupSize();
2627 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
2629 Buffer resultBuffer(
2630 context, resultBufferSizeInBytes);
2632 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
2634 for (deUint32 i = 0; i < inputsCount; i++)
2636 if (inputs[i].isImage)
2638 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
2639 static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
2643 vk::VkDeviceSize size =
2644 getFormatSizeInBytes(inputs[i].format) * inputs[i].numElements;
2645 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2648 const Allocation& alloc = inputBuffers[i]->getAllocation();
2649 initializeMemory(context, alloc, inputs[i]);
2652 DescriptorSetLayoutBuilder layoutBuilder;
2653 layoutBuilder.addBinding(
2654 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
2656 for (deUint32 i = 0; i < inputsCount; i++)
2658 layoutBuilder.addBinding(
2659 inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
2662 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2663 layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2665 const Unique<VkShaderModule> shaderModule(
2666 createShaderModule(context.getDeviceInterface(), context.getDevice(),
2667 context.getBinaryCollection().get("comp"), 0u));
2668 const Unique<VkPipelineLayout> pipelineLayout(
2669 makePipelineLayout(context, *descriptorSetLayout));
2671 DescriptorPoolBuilder poolBuilder;
2673 poolBuilder.addType(resultBuffer.getType());
2675 for (deUint32 i = 0; i < inputsCount; i++)
2677 poolBuilder.addType(inputBuffers[i]->getType());
2680 const Unique<VkDescriptorPool> descriptorPool(
2681 poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2682 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2684 // Create descriptor set
2685 const Unique<VkDescriptorSet> descriptorSet(
2686 makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
2688 DescriptorSetUpdateBuilder updateBuilder;
2690 const VkDescriptorBufferInfo resultDescriptorInfo =
2691 makeDescriptorBufferInfo(
2692 resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
2694 updateBuilder.writeSingle(*descriptorSet,
2695 DescriptorSetUpdateBuilder::Location::binding(0u),
2696 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
2698 for (deUint32 i = 0; i < inputsCount; i++)
2700 if (inputBuffers[i]->isImage())
2702 VkDescriptorImageInfo info =
2703 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
2704 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2706 updateBuilder.writeSingle(*descriptorSet,
2707 DescriptorSetUpdateBuilder::Location::binding(i + 1),
2708 inputBuffers[i]->getType(), &info);
2712 vk::VkDeviceSize size =
2713 getFormatSizeInBytes(inputs[i].format) * inputs[i].numElements;
2714 VkDescriptorBufferInfo info =
2715 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
2717 updateBuilder.writeSingle(*descriptorSet,
2718 DescriptorSetUpdateBuilder::Location::binding(i + 1),
2719 inputBuffers[i]->getType(), &info);
2723 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2725 const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
2727 unsigned totalIterations = 0;
2728 unsigned failedIterations = 0;
2730 const deUint32 subgroupSize = getSubgroupSize(context);
2732 const Unique<VkCommandBuffer> cmdBuffer(
2733 makeCommandBuffer(context, *cmdPool));
2735 const deUint32 numWorkgroups[3] = {4, 2, 2};
2737 const deUint32 localSizesToTestCount = 15;
2738 deUint32 localSizesToTest[localSizesToTestCount][3] =
2747 {subgroupSize, 1, 1},
2748 {1, subgroupSize, 1},
2749 {1, 1, subgroupSize},
2754 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
2757 Move<VkPipeline> lastPipeline(
2758 makeComputePipeline(context, *pipelineLayout, *shaderModule,
2759 localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]));
2761 for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
2763 const deUint32 nextX = localSizesToTest[index + 1][0];
2764 const deUint32 nextY = localSizesToTest[index + 1][1];
2765 const deUint32 nextZ = localSizesToTest[index + 1][2];
2767 // we are running one test
2770 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2772 context.getDeviceInterface().cmdBindPipeline(
2773 *cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *lastPipeline);
2775 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2776 VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
2777 &descriptorSet.get(), 0u, DE_NULL);
2779 context.getDeviceInterface().cmdDispatch(*cmdBuffer,
2780 numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
2782 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2784 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2786 Move<VkPipeline> nextPipeline(
2787 makeComputePipeline(context, *pipelineLayout, *shaderModule,
2788 nextX, nextY, nextZ));
2790 waitFence(context, fence);
2792 std::vector<const void*> datas;
2795 const Allocation& resultAlloc = resultBuffer.getAllocation();
2796 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2798 // we always have our result data first
2799 datas.push_back(resultAlloc.getHostPtr());
2802 for (deUint32 i = 0; i < inputsCount; i++)
2804 if (!inputBuffers[i]->isImage())
2806 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
2807 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2809 // we always have our result data first
2810 datas.push_back(resultAlloc.getHostPtr());
2814 if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
2819 context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2821 lastPipeline = nextPipeline;
2824 if (0 < failedIterations)
2826 context.getTestContext().getLog()
2827 << TestLog::Message << (totalIterations - failedIterations) << " / "
2828 << totalIterations << " values passed" << TestLog::EndMessage;
2829 return tcu::TestStatus::fail("Failed!");
2832 return tcu::TestStatus::pass("OK");