1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
7 * Copyright (c) 2017 Codeplay Software Ltd.
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
23 * \brief Subgroups Tests Utils
24 */ /*--------------------------------------------------------------------*/
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "deFloat16.h"
28 #include "deRandom.hpp"
29 #include "tcuCommandLine.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "vkBarrierUtil.hpp"
32 #include "vkImageUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkObjUtil.hpp"
44 deUint32 getMaxWidth ()
49 deUint32 getNextWidth (const deUint32 width)
53 // This ensures we test every value up to 128 (the max subgroup size).
58 // And once we hit 128 we increment to only power of 2's to reduce testing time.
63 deUint32 getFormatSizeInBytes(const VkFormat format)
68 DE_FATAL("Unhandled format!");
70 case VK_FORMAT_R8_SINT:
71 case VK_FORMAT_R8_UINT:
72 return static_cast<deUint32>(sizeof(deInt8));
73 case VK_FORMAT_R8G8_SINT:
74 case VK_FORMAT_R8G8_UINT:
75 return static_cast<deUint32>(sizeof(deInt8) * 2);
76 case VK_FORMAT_R8G8B8_SINT:
77 case VK_FORMAT_R8G8B8_UINT:
78 case VK_FORMAT_R8G8B8A8_SINT:
79 case VK_FORMAT_R8G8B8A8_UINT:
80 return static_cast<deUint32>(sizeof(deInt8) * 4);
81 case VK_FORMAT_R16_SINT:
82 case VK_FORMAT_R16_UINT:
83 case VK_FORMAT_R16_SFLOAT:
84 return static_cast<deUint32>(sizeof(deInt16));
85 case VK_FORMAT_R16G16_SINT:
86 case VK_FORMAT_R16G16_UINT:
87 case VK_FORMAT_R16G16_SFLOAT:
88 return static_cast<deUint32>(sizeof(deInt16) * 2);
89 case VK_FORMAT_R16G16B16_UINT:
90 case VK_FORMAT_R16G16B16_SINT:
91 case VK_FORMAT_R16G16B16_SFLOAT:
92 case VK_FORMAT_R16G16B16A16_SINT:
93 case VK_FORMAT_R16G16B16A16_UINT:
94 case VK_FORMAT_R16G16B16A16_SFLOAT:
95 return static_cast<deUint32>(sizeof(deInt16) * 4);
96 case VK_FORMAT_R32_SINT:
97 case VK_FORMAT_R32_UINT:
98 case VK_FORMAT_R32_SFLOAT:
99 return static_cast<deUint32>(sizeof(deInt32));
100 case VK_FORMAT_R32G32_SINT:
101 case VK_FORMAT_R32G32_UINT:
102 case VK_FORMAT_R32G32_SFLOAT:
103 return static_cast<deUint32>(sizeof(deInt32) * 2);
104 case VK_FORMAT_R32G32B32_SINT:
105 case VK_FORMAT_R32G32B32_UINT:
106 case VK_FORMAT_R32G32B32_SFLOAT:
107 case VK_FORMAT_R32G32B32A32_SINT:
108 case VK_FORMAT_R32G32B32A32_UINT:
109 case VK_FORMAT_R32G32B32A32_SFLOAT:
110 return static_cast<deUint32>(sizeof(deInt32) * 4);
111 case VK_FORMAT_R64_SINT:
112 case VK_FORMAT_R64_UINT:
113 case VK_FORMAT_R64_SFLOAT:
114 return static_cast<deUint32>(sizeof(deInt64));
115 case VK_FORMAT_R64G64_SINT:
116 case VK_FORMAT_R64G64_UINT:
117 case VK_FORMAT_R64G64_SFLOAT:
118 return static_cast<deUint32>(sizeof(deInt64) * 2);
119 case VK_FORMAT_R64G64B64_SINT:
120 case VK_FORMAT_R64G64B64_UINT:
121 case VK_FORMAT_R64G64B64_SFLOAT:
122 case VK_FORMAT_R64G64B64A64_SINT:
123 case VK_FORMAT_R64G64B64A64_UINT:
124 case VK_FORMAT_R64G64B64A64_SFLOAT:
125 return static_cast<deUint32>(sizeof(deInt64) * 4);
126 // The below formats are used to represent bool and bvec* types. These
127 // types are passed to the shader as int and ivec* types, before the
128 // calculations are done as booleans. We need a distinct type here so
129 // that the shader generators can switch on it and generate the correct
130 // shader source for testing.
131 case VK_FORMAT_R8_USCALED:
132 return static_cast<deUint32>(sizeof(deInt32));
133 case VK_FORMAT_R8G8_USCALED:
134 return static_cast<deUint32>(sizeof(deInt32) * 2);
135 case VK_FORMAT_R8G8B8_USCALED:
136 case VK_FORMAT_R8G8B8A8_USCALED:
137 return static_cast<deUint32>(sizeof(deInt32) * 4);
141 deUint32 getElementSizeInBytes(
142 const VkFormat format,
143 const subgroups::SSBOData::InputDataLayoutType layout)
145 deUint32 bytes = getFormatSizeInBytes(format);
146 if (layout == subgroups::SSBOData::LayoutStd140)
147 return bytes < 16 ? 16 : bytes;
152 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
154 VkAttachmentReference colorReference = {
155 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
158 const VkSubpassDescription subpassDescription = {0u,
159 VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
160 DE_NULL, DE_NULL, 0, DE_NULL
163 const VkSubpassDependency subpassDependencies[2] = {
164 { VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
165 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
166 VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
167 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
168 VK_DEPENDENCY_BY_REGION_BIT
170 { 0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
171 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
172 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
173 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
174 VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
178 VkAttachmentDescription attachmentDescription = {0u, format,
179 VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
180 VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
181 VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
182 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
185 const VkRenderPassCreateInfo renderPassCreateInfo = {
186 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
187 &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
190 return createRenderPass(context.getDeviceInterface(), context.getDevice(),
191 &renderPassCreateInfo);
194 Move<VkPipeline> makeGraphicsPipeline(const DeviceInterface& vk,
195 const VkDevice device,
196 const VkPipelineLayout pipelineLayout,
197 const VkShaderModule vertexShaderModule,
198 const VkShaderModule tessellationControlShaderModule,
199 const VkShaderModule tessellationEvalShaderModule,
200 const VkShaderModule geometryShaderModule,
201 const VkShaderModule fragmentShaderModule,
202 const VkRenderPass renderPass,
203 const std::vector<VkViewport>& viewports,
204 const std::vector<VkRect2D>& scissors,
205 const VkPrimitiveTopology topology,
206 const deUint32 subpass,
207 const deUint32 patchControlPoints,
208 const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo,
209 const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo,
210 const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo,
211 const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo,
212 const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo,
213 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfo,
214 const deUint32 vertexShaderStageCreateFlags,
215 const deUint32 tessellationControlShaderStageCreateFlags,
216 const deUint32 tessellationEvalShaderStageCreateFlags,
217 const deUint32 geometryShaderStageCreateFlags,
218 const deUint32 fragmentShaderStageCreateFlags,
219 const deUint32 requiredSubgroupSize[5])
221 const VkBool32 disableRasterization = (fragmentShaderModule == DE_NULL);
222 const bool hasTessellation = (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
224 VkPipelineShaderStageCreateInfo stageCreateInfo =
226 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType
227 DE_NULL, // const void* pNext
228 0u, // VkPipelineShaderStageCreateFlags flags
229 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage
230 DE_NULL, // VkShaderModule module
231 "main", // const char* pName
232 DE_NULL // const VkSpecializationInfo* pSpecializationInfo
235 std::vector<VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
237 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
240 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
242 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
245 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
247 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
250 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
252 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
255 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
257 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
260 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
262 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
266 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
267 stageCreateInfo.flags = vertexShaderStageCreateFlags;
268 stageCreateInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
269 stageCreateInfo.module = vertexShaderModule;
270 pipelineShaderStageParams.push_back(stageCreateInfo);
273 if (tessellationControlShaderModule != DE_NULL)
275 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
276 stageCreateInfo.flags = tessellationControlShaderStageCreateFlags;
277 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
278 stageCreateInfo.module = tessellationControlShaderModule;
279 pipelineShaderStageParams.push_back(stageCreateInfo);
282 if (tessellationEvalShaderModule != DE_NULL)
284 stageCreateInfo.pNext = (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
285 stageCreateInfo.flags = tessellationEvalShaderStageCreateFlags;
286 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
287 stageCreateInfo.module = tessellationEvalShaderModule;
288 pipelineShaderStageParams.push_back(stageCreateInfo);
291 if (geometryShaderModule != DE_NULL)
293 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
294 stageCreateInfo.flags = geometryShaderStageCreateFlags;
295 stageCreateInfo.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
296 stageCreateInfo.module = geometryShaderModule;
297 pipelineShaderStageParams.push_back(stageCreateInfo);
300 if (fragmentShaderModule != DE_NULL)
302 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
303 stageCreateInfo.flags = fragmentShaderStageCreateFlags;
304 stageCreateInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
305 stageCreateInfo.module = fragmentShaderModule;
306 pipelineShaderStageParams.push_back(stageCreateInfo);
309 const VkVertexInputBindingDescription vertexInputBindingDescription =
311 0u, // deUint32 binding
312 sizeof(tcu::Vec4), // deUint32 stride
313 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate
316 const VkVertexInputAttributeDescription vertexInputAttributeDescription =
318 0u, // deUint32 location
319 0u, // deUint32 binding
320 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format
321 0u // deUint32 offset
324 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfoDefault =
326 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType
327 DE_NULL, // const void* pNext
328 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags
329 1u, // deUint32 vertexBindingDescriptionCount
330 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions
331 1u, // deUint32 vertexAttributeDescriptionCount
332 &vertexInputAttributeDescription // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions
335 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
337 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType
338 DE_NULL, // const void* pNext
339 0u, // VkPipelineInputAssemblyStateCreateFlags flags
340 topology, // VkPrimitiveTopology topology
341 VK_FALSE // VkBool32 primitiveRestartEnable
344 const VkPipelineTessellationStateCreateInfo tessStateCreateInfo =
346 VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType sType
347 DE_NULL, // const void* pNext
348 0u, // VkPipelineTessellationStateCreateFlags flags
349 patchControlPoints // deUint32 patchControlPoints
352 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
354 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
355 DE_NULL, // const void* pNext
356 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
357 viewports.empty() ? 1u : (deUint32)viewports.size(), // deUint32 viewportCount
358 viewports.empty() ? DE_NULL : &viewports[0], // const VkViewport* pViewports
359 viewports.empty() ? 1u : (deUint32)scissors.size(), // deUint32 scissorCount
360 scissors.empty() ? DE_NULL : &scissors[0] // const VkRect2D* pScissors
363 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault =
365 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType
366 DE_NULL, // const void* pNext
367 0u, // VkPipelineRasterizationStateCreateFlags flags
368 VK_FALSE, // VkBool32 depthClampEnable
369 disableRasterization, // VkBool32 rasterizerDiscardEnable
370 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode
371 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode
372 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace
373 VK_FALSE, // VkBool32 depthBiasEnable
374 0.0f, // float depthBiasConstantFactor
375 0.0f, // float depthBiasClamp
376 0.0f, // float depthBiasSlopeFactor
377 1.0f // float lineWidth
380 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault =
382 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
383 DE_NULL, // const void* pNext
384 0u, // VkPipelineMultisampleStateCreateFlags flags
385 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
386 VK_FALSE, // VkBool32 sampleShadingEnable
387 1.0f, // float minSampleShading
388 DE_NULL, // const VkSampleMask* pSampleMask
389 VK_FALSE, // VkBool32 alphaToCoverageEnable
390 VK_FALSE // VkBool32 alphaToOneEnable
393 const VkStencilOpState stencilOpState =
395 VK_STENCIL_OP_KEEP, // VkStencilOp failOp
396 VK_STENCIL_OP_KEEP, // VkStencilOp passOp
397 VK_STENCIL_OP_KEEP, // VkStencilOp depthFailOp
398 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp
399 0, // deUint32 compareMask
400 0, // deUint32 writeMask
401 0 // deUint32 reference
404 const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault =
406 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType
407 DE_NULL, // const void* pNext
408 0u, // VkPipelineDepthStencilStateCreateFlags flags
409 VK_FALSE, // VkBool32 depthTestEnable
410 VK_FALSE, // VkBool32 depthWriteEnable
411 VK_COMPARE_OP_LESS_OR_EQUAL, // VkCompareOp depthCompareOp
412 VK_FALSE, // VkBool32 depthBoundsTestEnable
413 VK_FALSE, // VkBool32 stencilTestEnable
414 stencilOpState, // VkStencilOpState front
415 stencilOpState, // VkStencilOpState back
416 0.0f, // float minDepthBounds
417 1.0f, // float maxDepthBounds
420 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
422 VK_FALSE, // VkBool32 blendEnable
423 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor
424 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor
425 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp
426 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor
427 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor
428 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp
429 VK_COLOR_COMPONENT_R_BIT // VkColorComponentFlags colorWriteMask
430 | VK_COLOR_COMPONENT_G_BIT
431 | VK_COLOR_COMPONENT_B_BIT
432 | VK_COLOR_COMPONENT_A_BIT
435 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault =
437 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType
438 DE_NULL, // const void* pNext
439 0u, // VkPipelineColorBlendStateCreateFlags flags
440 VK_FALSE, // VkBool32 logicOpEnable
441 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp
442 1u, // deUint32 attachmentCount
443 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments
444 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConstants[4]
447 std::vector<VkDynamicState> dynamicStates;
449 if (viewports.empty())
450 dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
451 if (scissors.empty())
452 dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
454 const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault =
456 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType
457 DE_NULL, // const void* pNext
458 0u, // VkPipelineDynamicStateCreateFlags flags
459 (deUint32)dynamicStates.size(), // deUint32 dynamicStateCount
460 dynamicStates.empty() ? DE_NULL : &dynamicStates[0] // const VkDynamicState* pDynamicStates
463 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfoDefaultPtr = dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
465 const VkGraphicsPipelineCreateInfo pipelineCreateInfo =
467 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType
468 DE_NULL, // const void* pNext
469 0u, // VkPipelineCreateFlags flags
470 (deUint32)pipelineShaderStageParams.size(), // deUint32 stageCount
471 &pipelineShaderStageParams[0], // const VkPipelineShaderStageCreateInfo* pStages
472 vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState
473 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState
474 hasTessellation ? &tessStateCreateInfo : DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState
475 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState
476 rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState
477 multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState
478 depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState
479 colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState
480 dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr, // const VkPipelineDynamicStateCreateInfo* pDynamicState
481 pipelineLayout, // VkPipelineLayout layout
482 renderPass, // VkRenderPass renderPass
483 subpass, // deUint32 subpass
484 DE_NULL, // VkPipeline basePipelineHandle
485 0 // deInt32 basePipelineIndex;
488 return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
491 Move<VkPipeline> makeGraphicsPipeline(Context& context,
492 const VkPipelineLayout pipelineLayout,
493 const VkShaderStageFlags stages,
494 const VkShaderModule vertexShaderModule,
495 const VkShaderModule fragmentShaderModule,
496 const VkShaderModule geometryShaderModule,
497 const VkShaderModule tessellationControlModule,
498 const VkShaderModule tessellationEvaluationModule,
499 const VkRenderPass renderPass,
500 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
501 const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL,
502 const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL,
503 const bool frameBufferTests = false,
504 const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
505 const deUint32 vertexShaderStageCreateFlags = 0u,
506 const deUint32 tessellationControlShaderStageCreateFlags = 0u,
507 const deUint32 tessellationEvalShaderStageCreateFlags = 0u,
508 const deUint32 geometryShaderStageCreateFlags = 0u,
509 const deUint32 fragmentShaderStageCreateFlags = 0u,
510 const deUint32 requiredSubgroupSize[5] = DE_NULL)
512 std::vector<VkViewport> noViewports;
513 std::vector<VkRect2D> noScissors;
515 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
517 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
518 DE_NULL, // const void* pNext;
519 0u, // VkPipelineVertexInputStateCreateFlags flags;
520 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount;
521 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
522 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount;
523 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
526 const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
527 const VkColorComponentFlags colorComponent =
528 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
529 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
530 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
531 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
533 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
535 VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
536 VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
540 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
542 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
543 VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
544 { 0.0f, 0.0f, 0.0f, 0.0f }
547 const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
549 return makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk
550 context.getDevice(), // const VkDevice device
551 pipelineLayout, // const VkPipelineLayout pipelineLayout
552 vertexShaderModule, // const VkShaderModule vertexShaderModule
553 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
554 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
555 geometryShaderModule, // const VkShaderModule geometryShaderModule
556 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
557 renderPass, // const VkRenderPass renderPass
558 noViewports, // const std::vector<VkViewport>& viewports
559 noScissors, // const std::vector<VkRect2D>& scissors
560 topology, // const VkPrimitiveTopology topology
561 0u, // const deUint32 subpass
562 patchControlPoints, // const deUint32 patchControlPoints
563 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
564 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
565 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
566 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
567 &colorBlendStateCreateInfo, // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
568 DE_NULL, // const VkPipelineDynamicStateCreateInfo*
569 vertexShaderStageCreateFlags, // const deUint32 vertexShaderStageCreateFlags,
570 tessellationControlShaderStageCreateFlags, // const deUint32 tessellationControlShaderStageCreateFlags
571 tessellationEvalShaderStageCreateFlags, // const deUint32 tessellationEvalShaderStageCreateFlags
572 geometryShaderStageCreateFlags, // const deUint32 geometryShaderStageCreateFlags
573 fragmentShaderStageCreateFlags, // const deUint32 fragmentShaderStageCreateFlags
574 requiredSubgroupSize); // const deUint32 requiredSubgroupSize[5]
577 Move<VkCommandBuffer> makeCommandBuffer(
578 Context& context, const VkCommandPool commandPool)
580 const VkCommandBufferAllocateInfo bufferAllocateParams =
582 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
583 DE_NULL, // const void* pNext;
584 commandPool, // VkCommandPool commandPool;
585 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
586 1u, // deUint32 bufferCount;
588 return allocateCommandBuffer(context.getDeviceInterface(),
589 context.getDevice(), &bufferAllocateParams);
602 Buffer* getAsBuffer()
604 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
605 return reinterpret_cast<Buffer* >(this);
610 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
611 return reinterpret_cast<Image*>(this);
614 virtual VkDescriptorType getType() const
618 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
622 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
626 Allocation& getAllocation() const
628 return *m_allocation;
631 virtual ~BufferOrImage() {}
634 explicit BufferOrImage(bool image) : m_isImage(image) {}
637 de::details::MovePtr<Allocation> m_allocation;
640 struct Buffer : public BufferOrImage
643 Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
644 : BufferOrImage (false)
645 , m_sizeInBytes (sizeInBytes)
648 const DeviceInterface& vkd = context.getDeviceInterface();
649 const VkDevice device = context.getDevice();
651 const vk::VkBufferCreateInfo bufferCreateInfo =
653 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
658 VK_SHARING_MODE_EXCLUSIVE,
662 m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
664 VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
666 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
667 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
670 virtual VkDescriptorType getType() const
672 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
674 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
676 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
679 VkBuffer getBuffer () const
684 const VkBuffer* getBufferPtr () const
689 VkDeviceSize getSize () const
691 return m_sizeInBytes;
695 Move<VkBuffer> m_buffer;
696 VkDeviceSize m_sizeInBytes;
697 const VkBufferUsageFlags m_usage;
700 struct Image : public BufferOrImage
702 explicit Image(Context& context, deUint32 width, deUint32 height,
703 VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
704 : BufferOrImage(true)
706 const DeviceInterface& vk = context.getDeviceInterface();
707 const VkDevice device = context.getDevice();
708 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
710 const VkImageCreateInfo imageCreateInfo =
712 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
713 format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
714 VK_IMAGE_TILING_OPTIMAL, usage,
715 VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
716 VK_IMAGE_LAYOUT_UNDEFINED
719 const VkComponentMapping componentMapping =
721 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
722 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
725 const VkImageSubresourceRange subresourceRange =
727 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
728 0u, //deUint32 baseMipLevel
729 1u, //deUint32 levelCount
730 0u, //deUint32 baseArrayLayer
731 1u //deUint32 layerCount
734 const VkSamplerCreateInfo samplerCreateInfo =
736 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
741 VK_SAMPLER_MIPMAP_MODE_NEAREST,
742 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
743 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
744 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
749 VK_COMPARE_OP_ALWAYS,
752 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
756 m_image = createImage(vk, device, &imageCreateInfo);
758 VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
761 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
763 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
765 const VkImageViewCreateInfo imageViewCreateInfo =
767 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
768 VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
772 m_imageView = createImageView(vk, device, &imageViewCreateInfo);
773 m_sampler = createSampler(vk, device, &samplerCreateInfo);
775 // Transition input image layouts
777 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
778 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
780 beginCommandBuffer(vk, *cmdBuffer);
782 const VkImageMemoryBarrier imageBarrier = makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
783 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
785 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
786 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
788 endCommandBuffer(vk, *cmdBuffer);
789 submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
793 VkImage getImage () const
798 VkImageView getImageView () const
803 VkSampler getSampler () const
809 Move<VkImage> m_image;
810 Move<VkImageView> m_imageView;
811 Move<VkSampler> m_sampler;
815 std::string vkt::subgroups::getSharedMemoryBallotHelper()
817 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
818 "uvec4 sharedMemoryBallot(bool vote)\n"
820 " uint groupOffset = gl_SubgroupID;\n"
821 " // One invocation in the group 0's the whole group's data\n"
822 " if (subgroupElect())\n"
824 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
826 " subgroupMemoryBarrierShared();\n"
829 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
830 " const highp uint bitToSet = 1u << invocationId;\n"
831 " switch (gl_SubgroupInvocationID / 32)\n"
833 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
834 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
835 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
836 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
839 " subgroupMemoryBarrierShared();\n"
840 " return superSecretComputeShaderHelper[groupOffset];\n"
844 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
846 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
847 "uint64_t sharedMemoryBallot(bool vote)\n"
849 " uint groupOffset = gl_SubgroupID;\n"
850 " // One invocation in the group 0's the whole group's data\n"
851 " if (subgroupElect())\n"
853 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
855 " subgroupMemoryBarrierShared();\n"
858 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
859 " const highp uint bitToSet = 1u << invocationId;\n"
860 " switch (gl_SubgroupInvocationID / 32)\n"
862 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
863 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
864 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
865 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
868 " subgroupMemoryBarrierShared();\n"
869 " return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
873 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
875 VkPhysicalDeviceSubgroupProperties subgroupProperties;
876 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
877 subgroupProperties.pNext = DE_NULL;
879 VkPhysicalDeviceProperties2 properties;
880 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
881 properties.pNext = &subgroupProperties;
883 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
885 return subgroupProperties.subgroupSize;
888 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
892 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
897 DE_FATAL("Unhandled stage!");
899 case VK_SHADER_STAGE_COMPUTE_BIT:
901 case VK_SHADER_STAGE_FRAGMENT_BIT:
903 case VK_SHADER_STAGE_VERTEX_BIT:
905 case VK_SHADER_STAGE_GEOMETRY_BIT:
907 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
908 return "tess_control";
909 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
914 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
919 DE_FATAL("Unknown subgroup feature category!");
921 case VK_SUBGROUP_FEATURE_BASIC_BIT:
922 return "VK_SUBGROUP_FEATURE_BASIC_BIT";
923 case VK_SUBGROUP_FEATURE_VOTE_BIT:
924 return "VK_SUBGROUP_FEATURE_VOTE_BIT";
925 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
926 return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
927 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
928 return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
929 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
930 return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
931 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
932 return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
933 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
934 return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
935 case VK_SUBGROUP_FEATURE_QUAD_BIT:
936 return "VK_SUBGROUP_FEATURE_QUAD_BIT";
940 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
947 " float pixelSize = 2.0f/1024.0f;\n"
948 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
949 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
950 " gl_PointSize = 1.0f;\n"
953 const std::string vertNoSubgroup =
956 "; Generator: Khronos Glslang Reference Front End; 1\n"
959 "OpCapability Shader\n"
960 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
961 "OpMemoryModel Logical GLSL450\n"
962 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
963 "OpMemberDecorate %20 0 BuiltIn Position\n"
964 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
965 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
966 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
967 "OpDecorate %20 Block\n"
968 "OpDecorate %26 BuiltIn VertexIndex\n"
970 "%3 = OpTypeFunction %2\n"
971 "%6 = OpTypeFloat 32\n"
972 "%7 = OpTypePointer Function %6\n"
973 "%9 = OpConstant %6 0.00195313\n"
974 "%12 = OpConstant %6 2\n"
975 "%14 = OpConstant %6 1\n"
976 "%16 = OpTypeVector %6 4\n"
977 "%17 = OpTypeInt 32 0\n"
978 "%18 = OpConstant %17 1\n"
979 "%19 = OpTypeArray %6 %18\n"
980 "%20 = OpTypeStruct %16 %6 %19 %19\n"
981 "%21 = OpTypePointer Output %20\n"
982 "%22 = OpVariable %21 Output\n"
983 "%23 = OpTypeInt 32 1\n"
984 "%24 = OpConstant %23 0\n"
985 "%25 = OpTypePointer Input %23\n"
986 "%26 = OpVariable %25 Input\n"
987 "%33 = OpConstant %6 0\n"
988 "%35 = OpTypePointer Output %16\n"
989 "%37 = OpConstant %23 1\n"
990 "%38 = OpTypePointer Output %6\n"
991 "%4 = OpFunction %2 None %3\n"
993 "%8 = OpVariable %7 Function\n"
994 "%10 = OpVariable %7 Function\n"
996 "%11 = OpLoad %6 %8\n"
997 "%13 = OpFDiv %6 %11 %12\n"
998 "%15 = OpFSub %6 %13 %14\n"
1000 "%27 = OpLoad %23 %26\n"
1001 "%28 = OpConvertSToF %6 %27\n"
1002 "%29 = OpLoad %6 %8\n"
1003 "%30 = OpFMul %6 %28 %29\n"
1004 "%31 = OpLoad %6 %10\n"
1005 "%32 = OpFAdd %6 %30 %31\n"
1006 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1007 "%36 = OpAccessChain %35 %22 %24\n"
1009 "%39 = OpAccessChain %38 %22 %37\n"
1013 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1019 "layout(vertices=1) out;\n"
1021 "void main (void)\n"
1023 " if (gl_InvocationID == 0)\n"
1025 " gl_TessLevelOuter[0] = 1.0f;\n"
1026 " gl_TessLevelOuter[1] = 1.0f;\n"
1028 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1031 const std::string tescNoSubgroup =
1034 "; Generator: Khronos Glslang Reference Front End; 1\n"
1037 "OpCapability Tessellation\n"
1038 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1039 "OpMemoryModel Logical GLSL450\n"
1040 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1041 "OpExecutionMode %4 OutputVertices 1\n"
1042 "OpDecorate %8 BuiltIn InvocationId\n"
1043 "OpDecorate %20 Patch\n"
1044 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1045 "OpMemberDecorate %29 0 BuiltIn Position\n"
1046 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1047 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1048 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1049 "OpDecorate %29 Block\n"
1050 "OpMemberDecorate %34 0 BuiltIn Position\n"
1051 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1052 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1053 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1054 "OpDecorate %34 Block\n"
1056 "%3 = OpTypeFunction %2\n"
1057 "%6 = OpTypeInt 32 1\n"
1058 "%7 = OpTypePointer Input %6\n"
1059 "%8 = OpVariable %7 Input\n"
1060 "%10 = OpConstant %6 0\n"
1061 "%11 = OpTypeBool\n"
1062 "%15 = OpTypeFloat 32\n"
1063 "%16 = OpTypeInt 32 0\n"
1064 "%17 = OpConstant %16 4\n"
1065 "%18 = OpTypeArray %15 %17\n"
1066 "%19 = OpTypePointer Output %18\n"
1067 "%20 = OpVariable %19 Output\n"
1068 "%21 = OpConstant %15 1\n"
1069 "%22 = OpTypePointer Output %15\n"
1070 "%24 = OpConstant %6 1\n"
1071 "%26 = OpTypeVector %15 4\n"
1072 "%27 = OpConstant %16 1\n"
1073 "%28 = OpTypeArray %15 %27\n"
1074 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1075 "%30 = OpTypeArray %29 %27\n"
1076 "%31 = OpTypePointer Output %30\n"
1077 "%32 = OpVariable %31 Output\n"
1078 "%34 = OpTypeStruct %26 %15 %28 %28\n"
1079 "%35 = OpConstant %16 32\n"
1080 "%36 = OpTypeArray %34 %35\n"
1081 "%37 = OpTypePointer Input %36\n"
1082 "%38 = OpVariable %37 Input\n"
1083 "%40 = OpTypePointer Input %26\n"
1084 "%43 = OpTypePointer Output %26\n"
1085 "%4 = OpFunction %2 None %3\n"
1087 "%9 = OpLoad %6 %8\n"
1088 "%12 = OpIEqual %11 %9 %10\n"
1089 "OpSelectionMerge %14 None\n"
1090 "OpBranchConditional %12 %13 %14\n"
1092 "%23 = OpAccessChain %22 %20 %10\n"
1094 "%25 = OpAccessChain %22 %20 %24\n"
1098 "%33 = OpLoad %6 %8\n"
1099 "%39 = OpLoad %6 %8\n"
1100 "%41 = OpAccessChain %40 %38 %39 %10\n"
1101 "%42 = OpLoad %26 %41\n"
1102 "%44 = OpAccessChain %43 %32 %33 %10\n"
1106 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1112 "layout(isolines) in;\n"
1114 "void main (void)\n"
1116 " float pixelSize = 2.0f/1024.0f;\n"
1117 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1120 const std::string teseNoSubgroup =
1123 "; Generator: Khronos Glslang Reference Front End; 2\n"
1126 "OpCapability Tessellation\n"
1127 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1128 "OpMemoryModel Logical GLSL450\n"
1129 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1130 "OpExecutionMode %4 Isolines\n"
1131 "OpExecutionMode %4 SpacingEqual\n"
1132 "OpExecutionMode %4 VertexOrderCcw\n"
1133 "OpMemberDecorate %14 0 BuiltIn Position\n"
1134 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1135 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1136 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1137 "OpDecorate %14 Block\n"
1138 "OpMemberDecorate %19 0 BuiltIn Position\n"
1139 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1140 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1141 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1142 "OpDecorate %19 Block\n"
1143 "OpDecorate %29 BuiltIn TessCoord\n"
1145 "%3 = OpTypeFunction %2\n"
1146 "%6 = OpTypeFloat 32\n"
1147 "%7 = OpTypePointer Function %6\n"
1148 "%9 = OpConstant %6 0.00195313\n"
1149 "%10 = OpTypeVector %6 4\n"
1150 "%11 = OpTypeInt 32 0\n"
1151 "%12 = OpConstant %11 1\n"
1152 "%13 = OpTypeArray %6 %12\n"
1153 "%14 = OpTypeStruct %10 %6 %13 %13\n"
1154 "%15 = OpTypePointer Output %14\n"
1155 "%16 = OpVariable %15 Output\n"
1156 "%17 = OpTypeInt 32 1\n"
1157 "%18 = OpConstant %17 0\n"
1158 "%19 = OpTypeStruct %10 %6 %13 %13\n"
1159 "%20 = OpConstant %11 32\n"
1160 "%21 = OpTypeArray %19 %20\n"
1161 "%22 = OpTypePointer Input %21\n"
1162 "%23 = OpVariable %22 Input\n"
1163 "%24 = OpTypePointer Input %10\n"
1164 "%27 = OpTypeVector %6 3\n"
1165 "%28 = OpTypePointer Input %27\n"
1166 "%29 = OpVariable %28 Input\n"
1167 "%30 = OpConstant %11 0\n"
1168 "%31 = OpTypePointer Input %6\n"
1169 "%36 = OpConstant %6 2\n"
1170 "%40 = OpTypePointer Output %10\n"
1171 "%4 = OpFunction %2 None %3\n"
1173 "%8 = OpVariable %7 Function\n"
1175 "%25 = OpAccessChain %24 %23 %18 %18\n"
1176 "%26 = OpLoad %10 %25\n"
1177 "%32 = OpAccessChain %31 %29 %30\n"
1178 "%33 = OpLoad %6 %32\n"
1179 "%34 = OpLoad %6 %8\n"
1180 "%35 = OpFMul %6 %33 %34\n"
1181 "%37 = OpFDiv %6 %35 %36\n"
1182 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1183 "%39 = OpFAdd %10 %26 %38\n"
1184 "%41 = OpAccessChain %40 %16 %18\n"
1188 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1194 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
1199 DE_FATAL("Unhandled stage!");
1201 case VK_SHADER_STAGE_FRAGMENT_BIT:
1204 "void main (void)\n"
1206 " float pixelSize = 2.0f/1024.0f;\n"
1207 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1208 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1210 case VK_SHADER_STAGE_GEOMETRY_BIT:
1213 "void main (void)\n"
1216 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1217 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1220 "void main (void)\n"
1226 void vkt::subgroups::initStdFrameBufferPrograms( SourceCollections& programCollection,
1227 const vk::ShaderBuildOptions& buildOptions,
1228 VkShaderStageFlags shaderStage,
1231 std::string extHeader,
1232 std::string testSrc,
1233 std::string helperStr)
1235 subgroups::setFragmentShaderFrameBuffer(programCollection);
1237 if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1238 subgroups::setVertexShaderFrameBuffer(programCollection);
1240 if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1242 std::ostringstream vertex;
1243 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1244 << extHeader.c_str()
1245 << "layout(location = 0) in highp vec4 in_position;\n"
1246 << "layout(location = 0) out float result;\n"
1247 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1249 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1252 << helperStr.c_str()
1253 << "void main (void)\n"
1255 << " uint tempRes;\n"
1257 << " result = float(tempRes);\n"
1258 << " gl_Position = in_position;\n"
1259 << " gl_PointSize = 1.0f;\n"
1261 programCollection.glslSources.add("vert")
1262 << glu::VertexSource(vertex.str()) << buildOptions;
1264 else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1266 std::ostringstream geometry;
1268 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1269 << extHeader.c_str()
1270 << "layout(points) in;\n"
1271 << "layout(points, max_vertices = 1) out;\n"
1272 << "layout(location = 0) out float out_color;\n"
1273 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1275 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1278 << helperStr.c_str()
1279 << "void main (void)\n"
1281 << " uint tempRes;\n"
1283 << " out_color = float(tempRes);\n"
1284 << " gl_Position = gl_in[0].gl_Position;\n"
1285 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1286 << " EmitVertex();\n"
1287 << " EndPrimitive();\n"
1290 programCollection.glslSources.add("geometry")
1291 << glu::GeometrySource(geometry.str()) << buildOptions;
1293 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1295 std::ostringstream controlSource;
1296 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1297 << extHeader.c_str()
1298 << "layout(vertices = 2) out;\n"
1299 << "layout(location = 0) out float out_color[];\n"
1300 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1302 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1305 << helperStr.c_str()
1306 << "void main (void)\n"
1308 << " if (gl_InvocationID == 0)\n"
1310 << " gl_TessLevelOuter[0] = 1.0f;\n"
1311 << " gl_TessLevelOuter[1] = 1.0f;\n"
1313 << " uint tempRes;\n"
1315 << " out_color[gl_InvocationID] = float(tempRes);\n"
1316 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1319 programCollection.glslSources.add("tesc")
1320 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1321 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1323 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1325 ostringstream evaluationSource;
1326 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1327 << extHeader.c_str()
1328 << "layout(isolines, equal_spacing, ccw ) in;\n"
1329 << "layout(location = 0) out float out_color;\n"
1330 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1332 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1335 << helperStr.c_str()
1336 << "void main (void)\n"
1338 << " uint tempRes;\n"
1340 << " out_color = float(tempRes);\n"
1341 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1344 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1345 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1349 DE_FATAL("Unsupported shader stage");
1353 void vkt::subgroups::initStdPrograms( vk::SourceCollections& programCollection,
1354 const vk::ShaderBuildOptions& buildOptions,
1355 vk::VkShaderStageFlags shaderStage,
1356 vk::VkFormat format,
1357 std::string extHeader,
1358 std::string testSrc,
1359 std::string helperStr)
1361 if (shaderStage == VK_SHADER_STAGE_COMPUTE_BIT)
1363 std::ostringstream src;
1365 src << "#version 450\n"
1366 << extHeader.c_str()
1367 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1368 "local_size_z_id = 2) in;\n"
1369 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1371 << " uint result[];\n"
1373 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
1375 << " " << subgroups::getFormatNameForGLSL(format) << " data[];\n"
1378 << helperStr.c_str()
1379 << "void main (void)\n"
1381 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1382 << " highp uint offset = globalSize.x * ((globalSize.y * "
1383 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1384 "gl_GlobalInvocationID.x;\n"
1385 << " uint tempRes;\n"
1387 << " result[offset] = tempRes;\n"
1390 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1394 const string vertex =
1397 "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1401 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1403 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1407 "void main (void)\n"
1411 " result[gl_VertexIndex] = tempRes;\n"
1412 " float pixelSize = 2.0f/1024.0f;\n"
1413 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1414 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1415 " gl_PointSize = 1.0f;\n"
1421 "layout(vertices=1) out;\n"
1422 "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
1426 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1428 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1432 "void main (void)\n"
1436 " result[gl_PrimitiveID] = tempRes;\n"
1437 " if (gl_InvocationID == 0)\n"
1439 " gl_TessLevelOuter[0] = 1.0f;\n"
1440 " gl_TessLevelOuter[1] = 1.0f;\n"
1442 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1448 "layout(isolines) in;\n"
1449 "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
1453 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1455 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1459 "void main (void)\n"
1463 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1464 " float pixelSize = 2.0f/1024.0f;\n"
1465 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1468 const string geometry =
1471 "layout(${TOPOLOGY}) in;\n"
1472 "layout(points, max_vertices = 1) out;\n"
1473 "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
1477 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1479 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1483 "void main (void)\n"
1487 " result[gl_PrimitiveIDIn] = tempRes;\n"
1488 " gl_Position = gl_in[0].gl_Position;\n"
1490 " EndPrimitive();\n"
1493 const string fragment =
1496 "layout(location = 0) out uint result;\n"
1497 "layout(set = 0, binding = 4, std430) readonly buffer Buffer1\n"
1499 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1502 "void main (void)\n"
1506 " result = tempRes;\n"
1509 subgroups::addNoSubgroupShader(programCollection);
1511 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1512 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1513 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1514 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1515 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1519 bool vkt::subgroups::isSubgroupSupported(Context& context)
1521 return context.contextSupports(vk::ApiVersion(1, 1, 0));
1524 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
1525 Context& context, const VkShaderStageFlags stage)
1527 VkPhysicalDeviceSubgroupProperties subgroupProperties;
1528 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1529 subgroupProperties.pNext = DE_NULL;
1531 VkPhysicalDeviceProperties2 properties;
1532 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1533 properties.pNext = &subgroupProperties;
1535 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1537 return (stage & subgroupProperties.supportedStages) ? true : false;
1540 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
1541 VkShaderStageFlags stage)
1547 case VK_SHADER_STAGE_COMPUTE_BIT:
1552 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
1554 VkSubgroupFeatureFlagBits bit) {
1555 VkPhysicalDeviceSubgroupProperties subgroupProperties;
1556 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1557 subgroupProperties.pNext = DE_NULL;
1559 VkPhysicalDeviceProperties2 properties;
1560 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1561 properties.pNext = &subgroupProperties;
1563 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1565 return (bit & subgroupProperties.supportedOperations) ? true : false;
1568 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
1570 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1571 context.getInstanceInterface(), context.getPhysicalDevice());
1572 return features.fragmentStoresAndAtomics ? true : false;
1575 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1577 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1578 context.getInstanceInterface(), context.getPhysicalDevice());
1579 return features.vertexPipelineStoresAndAtomics ? true : false;
1582 bool vkt::subgroups::isInt64SupportedForDevice(Context& context)
1584 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1585 context.getInstanceInterface(), context.getPhysicalDevice());
1586 return features.shaderInt64 ? true : false;
1589 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1591 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1592 context.getInstanceInterface(), context.getPhysicalDevice());
1593 return features.shaderTessellationAndGeometryPointSize ? true : false;
1596 bool vkt::subgroups::isFormatSupportedForDevice(Context& context, vk::VkFormat format)
1598 VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures subgroupExtendedTypesFeatures;
1599 deMemset(&subgroupExtendedTypesFeatures, 0, sizeof(subgroupExtendedTypesFeatures));
1600 subgroupExtendedTypesFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES;
1601 subgroupExtendedTypesFeatures.pNext = DE_NULL;
1603 VkPhysicalDeviceShaderFloat16Int8Features float16Int8Features;
1604 deMemset(&float16Int8Features, 0, sizeof(float16Int8Features));
1605 float16Int8Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
1606 float16Int8Features.pNext = DE_NULL;
1608 VkPhysicalDeviceFeatures2 features2;
1609 deMemset(&features2, 0, sizeof(features2));
1610 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1611 features2.pNext = DE_NULL;
1613 VkPhysicalDevice16BitStorageFeatures storage16bit;
1614 deMemset(&storage16bit, 0, sizeof(storage16bit));
1615 storage16bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
1616 storage16bit.pNext = DE_NULL;
1617 bool is16bitStorageSupported = context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage");
1619 if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1620 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1622 features2.pNext = &subgroupExtendedTypesFeatures;
1623 subgroupExtendedTypesFeatures.pNext = &float16Int8Features;
1624 if ( is16bitStorageSupported )
1626 float16Int8Features.pNext = &storage16bit;
1631 const PlatformInterface& platformInterface = context.getPlatformInterface();
1632 const VkInstance instance = context.getInstance();
1633 const InstanceDriver instanceDriver (platformInterface, instance);
1635 instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1641 case VK_FORMAT_R16_SFLOAT:
1642 case VK_FORMAT_R16G16_SFLOAT:
1643 case VK_FORMAT_R16G16B16_SFLOAT:
1644 case VK_FORMAT_R16G16B16A16_SFLOAT:
1645 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderFloat16 & storage16bit.storageBuffer16BitAccess ? true : false;
1646 case VK_FORMAT_R64_SFLOAT:
1647 case VK_FORMAT_R64G64_SFLOAT:
1648 case VK_FORMAT_R64G64B64_SFLOAT:
1649 case VK_FORMAT_R64G64B64A64_SFLOAT:
1650 return features2.features.shaderFloat64 ? true : false;
1651 case VK_FORMAT_R8_SINT:
1652 case VK_FORMAT_R8G8_SINT:
1653 case VK_FORMAT_R8G8B8_SINT:
1654 case VK_FORMAT_R8G8B8A8_SINT:
1655 case VK_FORMAT_R8_UINT:
1656 case VK_FORMAT_R8G8_UINT:
1657 case VK_FORMAT_R8G8B8_UINT:
1658 case VK_FORMAT_R8G8B8A8_UINT:
1659 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderInt8 ? true : false;
1660 case VK_FORMAT_R16_SINT:
1661 case VK_FORMAT_R16G16_SINT:
1662 case VK_FORMAT_R16G16B16_SINT:
1663 case VK_FORMAT_R16G16B16A16_SINT:
1664 case VK_FORMAT_R16_UINT:
1665 case VK_FORMAT_R16G16_UINT:
1666 case VK_FORMAT_R16G16B16_UINT:
1667 case VK_FORMAT_R16G16B16A16_UINT:
1668 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt16 & storage16bit.storageBuffer16BitAccess ? true : false;
1669 case VK_FORMAT_R64_SINT:
1670 case VK_FORMAT_R64G64_SINT:
1671 case VK_FORMAT_R64G64B64_SINT:
1672 case VK_FORMAT_R64G64B64A64_SINT:
1673 case VK_FORMAT_R64_UINT:
1674 case VK_FORMAT_R64G64_UINT:
1675 case VK_FORMAT_R64G64B64_UINT:
1676 case VK_FORMAT_R64G64B64A64_UINT:
1677 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt64 ? true : false;
1681 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1683 return context.contextSupports(vk::ApiVersion(1, 2, 0)) &&
1684 vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1687 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1692 DE_FATAL("Unhandled format!");
1694 case VK_FORMAT_R8_SINT:
1696 case VK_FORMAT_R8G8_SINT:
1698 case VK_FORMAT_R8G8B8_SINT:
1700 case VK_FORMAT_R8G8B8A8_SINT:
1702 case VK_FORMAT_R8_UINT:
1704 case VK_FORMAT_R8G8_UINT:
1706 case VK_FORMAT_R8G8B8_UINT:
1708 case VK_FORMAT_R8G8B8A8_UINT:
1710 case VK_FORMAT_R16_SINT:
1712 case VK_FORMAT_R16G16_SINT:
1714 case VK_FORMAT_R16G16B16_SINT:
1716 case VK_FORMAT_R16G16B16A16_SINT:
1718 case VK_FORMAT_R16_UINT:
1720 case VK_FORMAT_R16G16_UINT:
1722 case VK_FORMAT_R16G16B16_UINT:
1724 case VK_FORMAT_R16G16B16A16_UINT:
1726 case VK_FORMAT_R32_SINT:
1728 case VK_FORMAT_R32G32_SINT:
1730 case VK_FORMAT_R32G32B32_SINT:
1732 case VK_FORMAT_R32G32B32A32_SINT:
1734 case VK_FORMAT_R32_UINT:
1736 case VK_FORMAT_R32G32_UINT:
1738 case VK_FORMAT_R32G32B32_UINT:
1740 case VK_FORMAT_R32G32B32A32_UINT:
1742 case VK_FORMAT_R64_SINT:
1744 case VK_FORMAT_R64G64_SINT:
1746 case VK_FORMAT_R64G64B64_SINT:
1748 case VK_FORMAT_R64G64B64A64_SINT:
1750 case VK_FORMAT_R64_UINT:
1752 case VK_FORMAT_R64G64_UINT:
1754 case VK_FORMAT_R64G64B64_UINT:
1756 case VK_FORMAT_R64G64B64A64_UINT:
1758 case VK_FORMAT_R16_SFLOAT:
1760 case VK_FORMAT_R16G16_SFLOAT:
1762 case VK_FORMAT_R16G16B16_SFLOAT:
1764 case VK_FORMAT_R16G16B16A16_SFLOAT:
1766 case VK_FORMAT_R32_SFLOAT:
1768 case VK_FORMAT_R32G32_SFLOAT:
1770 case VK_FORMAT_R32G32B32_SFLOAT:
1772 case VK_FORMAT_R32G32B32A32_SFLOAT:
1774 case VK_FORMAT_R64_SFLOAT:
1776 case VK_FORMAT_R64G64_SFLOAT:
1778 case VK_FORMAT_R64G64B64_SFLOAT:
1780 case VK_FORMAT_R64G64B64A64_SFLOAT:
1782 case VK_FORMAT_R8_USCALED:
1784 case VK_FORMAT_R8G8_USCALED:
1786 case VK_FORMAT_R8G8B8_USCALED:
1788 case VK_FORMAT_R8G8B8A8_USCALED:
1793 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1799 case VK_FORMAT_R8_SINT:
1800 case VK_FORMAT_R8G8_SINT:
1801 case VK_FORMAT_R8G8B8_SINT:
1802 case VK_FORMAT_R8G8B8A8_SINT:
1803 case VK_FORMAT_R8_UINT:
1804 case VK_FORMAT_R8G8_UINT:
1805 case VK_FORMAT_R8G8B8_UINT:
1806 case VK_FORMAT_R8G8B8A8_UINT:
1807 return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1808 case VK_FORMAT_R16_SINT:
1809 case VK_FORMAT_R16G16_SINT:
1810 case VK_FORMAT_R16G16B16_SINT:
1811 case VK_FORMAT_R16G16B16A16_SINT:
1812 case VK_FORMAT_R16_UINT:
1813 case VK_FORMAT_R16G16_UINT:
1814 case VK_FORMAT_R16G16B16_UINT:
1815 case VK_FORMAT_R16G16B16A16_UINT:
1816 return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1817 case VK_FORMAT_R64_SINT:
1818 case VK_FORMAT_R64G64_SINT:
1819 case VK_FORMAT_R64G64B64_SINT:
1820 case VK_FORMAT_R64G64B64A64_SINT:
1821 case VK_FORMAT_R64_UINT:
1822 case VK_FORMAT_R64G64_UINT:
1823 case VK_FORMAT_R64G64B64_UINT:
1824 case VK_FORMAT_R64G64B64A64_UINT:
1825 return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1826 case VK_FORMAT_R16_SFLOAT:
1827 case VK_FORMAT_R16G16_SFLOAT:
1828 case VK_FORMAT_R16G16B16_SFLOAT:
1829 case VK_FORMAT_R16G16B16A16_SFLOAT:
1830 return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1834 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1836 std::vector<VkFormat> formats;
1838 formats.push_back(VK_FORMAT_R8_SINT);
1839 formats.push_back(VK_FORMAT_R8G8_SINT);
1840 formats.push_back(VK_FORMAT_R8G8B8_SINT);
1841 formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1842 formats.push_back(VK_FORMAT_R8_UINT);
1843 formats.push_back(VK_FORMAT_R8G8_UINT);
1844 formats.push_back(VK_FORMAT_R8G8B8_UINT);
1845 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1846 formats.push_back(VK_FORMAT_R16_SINT);
1847 formats.push_back(VK_FORMAT_R16G16_SINT);
1848 formats.push_back(VK_FORMAT_R16G16B16_SINT);
1849 formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1850 formats.push_back(VK_FORMAT_R16_UINT);
1851 formats.push_back(VK_FORMAT_R16G16_UINT);
1852 formats.push_back(VK_FORMAT_R16G16B16_UINT);
1853 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1854 formats.push_back(VK_FORMAT_R32_SINT);
1855 formats.push_back(VK_FORMAT_R32G32_SINT);
1856 formats.push_back(VK_FORMAT_R32G32B32_SINT);
1857 formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1858 formats.push_back(VK_FORMAT_R32_UINT);
1859 formats.push_back(VK_FORMAT_R32G32_UINT);
1860 formats.push_back(VK_FORMAT_R32G32B32_UINT);
1861 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1862 formats.push_back(VK_FORMAT_R64_SINT);
1863 formats.push_back(VK_FORMAT_R64G64_SINT);
1864 formats.push_back(VK_FORMAT_R64G64B64_SINT);
1865 formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
1866 formats.push_back(VK_FORMAT_R64_UINT);
1867 formats.push_back(VK_FORMAT_R64G64_UINT);
1868 formats.push_back(VK_FORMAT_R64G64B64_UINT);
1869 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
1870 formats.push_back(VK_FORMAT_R16_SFLOAT);
1871 formats.push_back(VK_FORMAT_R16G16_SFLOAT);
1872 formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
1873 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
1874 formats.push_back(VK_FORMAT_R32_SFLOAT);
1875 formats.push_back(VK_FORMAT_R32G32_SFLOAT);
1876 formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
1877 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
1878 formats.push_back(VK_FORMAT_R64_SFLOAT);
1879 formats.push_back(VK_FORMAT_R64G64_SFLOAT);
1880 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
1881 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
1882 formats.push_back(VK_FORMAT_R8_USCALED);
1883 formats.push_back(VK_FORMAT_R8G8_USCALED);
1884 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
1885 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
1890 bool vkt::subgroups::isFormatSigned (VkFormat format)
1896 case VK_FORMAT_R8_SINT:
1897 case VK_FORMAT_R8G8_SINT:
1898 case VK_FORMAT_R8G8B8_SINT:
1899 case VK_FORMAT_R8G8B8A8_SINT:
1900 case VK_FORMAT_R16_SINT:
1901 case VK_FORMAT_R16G16_SINT:
1902 case VK_FORMAT_R16G16B16_SINT:
1903 case VK_FORMAT_R16G16B16A16_SINT:
1904 case VK_FORMAT_R32_SINT:
1905 case VK_FORMAT_R32G32_SINT:
1906 case VK_FORMAT_R32G32B32_SINT:
1907 case VK_FORMAT_R32G32B32A32_SINT:
1908 case VK_FORMAT_R64_SINT:
1909 case VK_FORMAT_R64G64_SINT:
1910 case VK_FORMAT_R64G64B64_SINT:
1911 case VK_FORMAT_R64G64B64A64_SINT:
1916 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
1922 case VK_FORMAT_R8_UINT:
1923 case VK_FORMAT_R8G8_UINT:
1924 case VK_FORMAT_R8G8B8_UINT:
1925 case VK_FORMAT_R8G8B8A8_UINT:
1926 case VK_FORMAT_R16_UINT:
1927 case VK_FORMAT_R16G16_UINT:
1928 case VK_FORMAT_R16G16B16_UINT:
1929 case VK_FORMAT_R16G16B16A16_UINT:
1930 case VK_FORMAT_R32_UINT:
1931 case VK_FORMAT_R32G32_UINT:
1932 case VK_FORMAT_R32G32B32_UINT:
1933 case VK_FORMAT_R32G32B32A32_UINT:
1934 case VK_FORMAT_R64_UINT:
1935 case VK_FORMAT_R64G64_UINT:
1936 case VK_FORMAT_R64G64B64_UINT:
1937 case VK_FORMAT_R64G64B64A64_UINT:
1942 bool vkt::subgroups::isFormatFloat (VkFormat format)
1948 case VK_FORMAT_R16_SFLOAT:
1949 case VK_FORMAT_R16G16_SFLOAT:
1950 case VK_FORMAT_R16G16B16_SFLOAT:
1951 case VK_FORMAT_R16G16B16A16_SFLOAT:
1952 case VK_FORMAT_R32_SFLOAT:
1953 case VK_FORMAT_R32G32_SFLOAT:
1954 case VK_FORMAT_R32G32B32_SFLOAT:
1955 case VK_FORMAT_R32G32B32A32_SFLOAT:
1956 case VK_FORMAT_R64_SFLOAT:
1957 case VK_FORMAT_R64G64_SFLOAT:
1958 case VK_FORMAT_R64G64B64_SFLOAT:
1959 case VK_FORMAT_R64G64B64A64_SFLOAT:
1964 bool vkt::subgroups::isFormatBool (VkFormat format)
1970 case VK_FORMAT_R8_USCALED:
1971 case VK_FORMAT_R8G8_USCALED:
1972 case VK_FORMAT_R8G8B8_USCALED:
1973 case VK_FORMAT_R8G8B8A8_USCALED:
1978 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
1981 "layout(location = 0) in highp vec4 in_position;\n"
1982 "void main (void)\n"
1984 " gl_Position = in_position;\n"
1985 " gl_PointSize = 1.0f;\n"
1988 programCollection.spirvAsmSources.add("vert") <<
1991 "; Generator: Khronos Glslang Reference Front End; 7\n"
1994 "OpCapability Shader\n"
1995 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1996 "OpMemoryModel Logical GLSL450\n"
1997 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
1998 "OpMemberDecorate %11 0 BuiltIn Position\n"
1999 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2000 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2001 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2002 "OpDecorate %11 Block\n"
2003 "OpDecorate %17 Location 0\n"
2005 "%3 = OpTypeFunction %2\n"
2006 "%6 = OpTypeFloat 32\n"
2007 "%7 = OpTypeVector %6 4\n"
2008 "%8 = OpTypeInt 32 0\n"
2009 "%9 = OpConstant %8 1\n"
2010 "%10 = OpTypeArray %6 %9\n"
2011 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2012 "%12 = OpTypePointer Output %11\n"
2013 "%13 = OpVariable %12 Output\n"
2014 "%14 = OpTypeInt 32 1\n"
2015 "%15 = OpConstant %14 0\n"
2016 "%16 = OpTypePointer Input %7\n"
2017 "%17 = OpVariable %16 Input\n"
2018 "%19 = OpTypePointer Output %7\n"
2019 "%21 = OpConstant %14 1\n"
2020 "%22 = OpConstant %6 1\n"
2021 "%23 = OpTypePointer Output %6\n"
2022 "%4 = OpFunction %2 None %3\n"
2024 "%18 = OpLoad %7 %17\n"
2025 "%20 = OpAccessChain %19 %13 %15\n"
2027 "%24 = OpAccessChain %23 %13 %21\n"
2033 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2036 "layout(location = 0) in float in_color;\n"
2037 "layout(location = 0) out uint out_color;\n"
2040 " out_color = uint(in_color);\n"
2043 programCollection.spirvAsmSources.add("fragment") <<
2046 "; Generator: Khronos Glslang Reference Front End; 2\n"
2049 "OpCapability Shader\n"
2050 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2051 "OpMemoryModel Logical GLSL450\n"
2052 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2053 "OpExecutionMode %4 OriginUpperLeft\n"
2054 "OpDecorate %8 Location 0\n"
2055 "OpDecorate %11 Location 0\n"
2057 "%3 = OpTypeFunction %2\n"
2058 "%6 = OpTypeInt 32 0\n"
2059 "%7 = OpTypePointer Output %6\n"
2060 "%8 = OpVariable %7 Output\n"
2061 "%9 = OpTypeFloat 32\n"
2062 "%10 = OpTypePointer Input %9\n"
2063 "%11 = OpVariable %10 Input\n"
2064 "%4 = OpFunction %2 None %3\n"
2066 "%12 = OpLoad %9 %11\n"
2067 "%13 = OpConvertFToU %6 %12\n"
2073 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2076 "#extension GL_KHR_shader_subgroup_basic: enable\n"
2077 "#extension GL_EXT_tessellation_shader : require\n"
2078 "layout(vertices = 2) out;\n"
2079 "void main (void)\n"
2081 " if (gl_InvocationID == 0)\n"
2083 " gl_TessLevelOuter[0] = 1.0f;\n"
2084 " gl_TessLevelOuter[1] = 1.0f;\n"
2086 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2089 programCollection.spirvAsmSources.add("tesc") <<
2092 "; Generator: Khronos Glslang Reference Front End; 2\n"
2095 "OpCapability Tessellation\n"
2096 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2097 "OpMemoryModel Logical GLSL450\n"
2098 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2099 "OpExecutionMode %4 OutputVertices 2\n"
2100 "OpDecorate %8 BuiltIn InvocationId\n"
2101 "OpDecorate %20 Patch\n"
2102 "OpDecorate %20 BuiltIn TessLevelOuter\n"
2103 "OpMemberDecorate %29 0 BuiltIn Position\n"
2104 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2105 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2106 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2107 "OpDecorate %29 Block\n"
2108 "OpMemberDecorate %35 0 BuiltIn Position\n"
2109 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2110 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2111 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2112 "OpDecorate %35 Block\n"
2114 "%3 = OpTypeFunction %2\n"
2115 "%6 = OpTypeInt 32 1\n"
2116 "%7 = OpTypePointer Input %6\n"
2117 "%8 = OpVariable %7 Input\n"
2118 "%10 = OpConstant %6 0\n"
2119 "%11 = OpTypeBool\n"
2120 "%15 = OpTypeFloat 32\n"
2121 "%16 = OpTypeInt 32 0\n"
2122 "%17 = OpConstant %16 4\n"
2123 "%18 = OpTypeArray %15 %17\n"
2124 "%19 = OpTypePointer Output %18\n"
2125 "%20 = OpVariable %19 Output\n"
2126 "%21 = OpConstant %15 1\n"
2127 "%22 = OpTypePointer Output %15\n"
2128 "%24 = OpConstant %6 1\n"
2129 "%26 = OpTypeVector %15 4\n"
2130 "%27 = OpConstant %16 1\n"
2131 "%28 = OpTypeArray %15 %27\n"
2132 "%29 = OpTypeStruct %26 %15 %28 %28\n"
2133 "%30 = OpConstant %16 2\n"
2134 "%31 = OpTypeArray %29 %30\n"
2135 "%32 = OpTypePointer Output %31\n"
2136 "%33 = OpVariable %32 Output\n"
2137 "%35 = OpTypeStruct %26 %15 %28 %28\n"
2138 "%36 = OpConstant %16 32\n"
2139 "%37 = OpTypeArray %35 %36\n"
2140 "%38 = OpTypePointer Input %37\n"
2141 "%39 = OpVariable %38 Input\n"
2142 "%41 = OpTypePointer Input %26\n"
2143 "%44 = OpTypePointer Output %26\n"
2144 "%4 = OpFunction %2 None %3\n"
2146 "%9 = OpLoad %6 %8\n"
2147 "%12 = OpIEqual %11 %9 %10\n"
2148 "OpSelectionMerge %14 None\n"
2149 "OpBranchConditional %12 %13 %14\n"
2151 "%23 = OpAccessChain %22 %20 %10\n"
2153 "%25 = OpAccessChain %22 %20 %24\n"
2157 "%34 = OpLoad %6 %8\n"
2158 "%40 = OpLoad %6 %8\n"
2159 "%42 = OpAccessChain %41 %39 %40 %10\n"
2160 "%43 = OpLoad %26 %42\n"
2161 "%45 = OpAccessChain %44 %33 %34 %10\n"
2167 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2170 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2171 "#extension GL_EXT_tessellation_shader : require\n"
2172 "layout(isolines, equal_spacing, ccw ) in;\n"
2173 "layout(location = 0) in float in_color[];\n"
2174 "layout(location = 0) out float out_color;\n"
2176 "void main (void)\n"
2178 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2179 " out_color = in_color[0];\n"
2182 programCollection.spirvAsmSources.add("tese") <<
2185 "; Generator: Khronos Glslang Reference Front End; 2\n"
2188 "OpCapability Tessellation\n"
2189 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2190 "OpMemoryModel Logical GLSL450\n"
2191 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2192 "OpExecutionMode %4 Isolines\n"
2193 "OpExecutionMode %4 SpacingEqual\n"
2194 "OpExecutionMode %4 VertexOrderCcw\n"
2195 "OpMemberDecorate %11 0 BuiltIn Position\n"
2196 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2197 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2198 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2199 "OpDecorate %11 Block\n"
2200 "OpMemberDecorate %16 0 BuiltIn Position\n"
2201 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2202 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2203 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2204 "OpDecorate %16 Block\n"
2205 "OpDecorate %29 BuiltIn TessCoord\n"
2206 "OpDecorate %39 Location 0\n"
2207 "OpDecorate %42 Location 0\n"
2209 "%3 = OpTypeFunction %2\n"
2210 "%6 = OpTypeFloat 32\n"
2211 "%7 = OpTypeVector %6 4\n"
2212 "%8 = OpTypeInt 32 0\n"
2213 "%9 = OpConstant %8 1\n"
2214 "%10 = OpTypeArray %6 %9\n"
2215 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2216 "%12 = OpTypePointer Output %11\n"
2217 "%13 = OpVariable %12 Output\n"
2218 "%14 = OpTypeInt 32 1\n"
2219 "%15 = OpConstant %14 0\n"
2220 "%16 = OpTypeStruct %7 %6 %10 %10\n"
2221 "%17 = OpConstant %8 32\n"
2222 "%18 = OpTypeArray %16 %17\n"
2223 "%19 = OpTypePointer Input %18\n"
2224 "%20 = OpVariable %19 Input\n"
2225 "%21 = OpTypePointer Input %7\n"
2226 "%24 = OpConstant %14 1\n"
2227 "%27 = OpTypeVector %6 3\n"
2228 "%28 = OpTypePointer Input %27\n"
2229 "%29 = OpVariable %28 Input\n"
2230 "%30 = OpConstant %8 0\n"
2231 "%31 = OpTypePointer Input %6\n"
2232 "%36 = OpTypePointer Output %7\n"
2233 "%38 = OpTypePointer Output %6\n"
2234 "%39 = OpVariable %38 Output\n"
2235 "%40 = OpTypeArray %6 %17\n"
2236 "%41 = OpTypePointer Input %40\n"
2237 "%42 = OpVariable %41 Input\n"
2238 "%4 = OpFunction %2 None %3\n"
2240 "%22 = OpAccessChain %21 %20 %15 %15\n"
2241 "%23 = OpLoad %7 %22\n"
2242 "%25 = OpAccessChain %21 %20 %24 %15\n"
2243 "%26 = OpLoad %7 %25\n"
2244 "%32 = OpAccessChain %31 %29 %30\n"
2245 "%33 = OpLoad %6 %32\n"
2246 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2247 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2248 "%37 = OpAccessChain %36 %13 %15\n"
2250 "%43 = OpAccessChain %31 %42 %15\n"
2251 "%44 = OpLoad %6 %43\n"
2257 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection)
2259 tcu::StringTemplate geometryTemplate(glslTemplate);
2261 map<string, string> linesParams;
2262 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2264 map<string, string> pointsParams;
2265 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2267 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
2268 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
2271 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2273 tcu::StringTemplate geometryTemplate(spirvTemplate);
2275 map<string, string> linesParams;
2276 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2278 map<string, string> pointsParams;
2279 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2281 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2282 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2285 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
2287 const vk::VkFormat format = data.format;
2288 const vk::VkDeviceSize size = data.numElements *
2289 (data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2290 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2292 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2297 DE_FATAL("Illegal buffer format");
2299 case VK_FORMAT_R8_SINT:
2300 case VK_FORMAT_R8G8_SINT:
2301 case VK_FORMAT_R8G8B8_SINT:
2302 case VK_FORMAT_R8G8B8A8_SINT:
2303 case VK_FORMAT_R8_UINT:
2304 case VK_FORMAT_R8G8_UINT:
2305 case VK_FORMAT_R8G8B8_UINT:
2306 case VK_FORMAT_R8G8B8A8_UINT:
2308 deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2310 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2312 ptr[k] = rnd.getUint8();
2316 case VK_FORMAT_R16_SINT:
2317 case VK_FORMAT_R16G16_SINT:
2318 case VK_FORMAT_R16G16B16_SINT:
2319 case VK_FORMAT_R16G16B16A16_SINT:
2320 case VK_FORMAT_R16_UINT:
2321 case VK_FORMAT_R16G16_UINT:
2322 case VK_FORMAT_R16G16B16_UINT:
2323 case VK_FORMAT_R16G16B16A16_UINT:
2325 deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2327 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2329 ptr[k] = rnd.getUint16();
2333 case VK_FORMAT_R8_USCALED:
2334 case VK_FORMAT_R8G8_USCALED:
2335 case VK_FORMAT_R8G8B8_USCALED:
2336 case VK_FORMAT_R8G8B8A8_USCALED:
2338 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2340 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2342 deUint32 r = rnd.getUint32();
2343 ptr[k] = (r & 1) ? r : 0;
2347 case VK_FORMAT_R32_SINT:
2348 case VK_FORMAT_R32G32_SINT:
2349 case VK_FORMAT_R32G32B32_SINT:
2350 case VK_FORMAT_R32G32B32A32_SINT:
2351 case VK_FORMAT_R32_UINT:
2352 case VK_FORMAT_R32G32_UINT:
2353 case VK_FORMAT_R32G32B32_UINT:
2354 case VK_FORMAT_R32G32B32A32_UINT:
2356 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2358 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2360 ptr[k] = rnd.getUint32();
2364 case VK_FORMAT_R64_SINT:
2365 case VK_FORMAT_R64G64_SINT:
2366 case VK_FORMAT_R64G64B64_SINT:
2367 case VK_FORMAT_R64G64B64A64_SINT:
2368 case VK_FORMAT_R64_UINT:
2369 case VK_FORMAT_R64G64_UINT:
2370 case VK_FORMAT_R64G64B64_UINT:
2371 case VK_FORMAT_R64G64B64A64_UINT:
2373 deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2375 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2377 ptr[k] = rnd.getUint64();
2381 case VK_FORMAT_R16_SFLOAT:
2382 case VK_FORMAT_R16G16_SFLOAT:
2383 case VK_FORMAT_R16G16B16_SFLOAT:
2384 case VK_FORMAT_R16G16B16A16_SFLOAT:
2386 deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2388 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2390 ptr[k] = deFloat32To16(rnd.getFloat());
2394 case VK_FORMAT_R32_SFLOAT:
2395 case VK_FORMAT_R32G32_SFLOAT:
2396 case VK_FORMAT_R32G32B32_SFLOAT:
2397 case VK_FORMAT_R32G32B32A32_SFLOAT:
2399 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2401 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2403 ptr[k] = rnd.getFloat();
2407 case VK_FORMAT_R64_SFLOAT:
2408 case VK_FORMAT_R64G64_SFLOAT:
2409 case VK_FORMAT_R64G64B64_SFLOAT:
2410 case VK_FORMAT_R64G64B64A64_SFLOAT:
2412 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2414 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2416 ptr[k] = rnd.getDouble();
2422 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2424 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2426 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2432 if (subgroups::SSBOData::InitializeNone != data.initializeType)
2434 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2438 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2442 case VK_SHADER_STAGE_VERTEX_BIT:
2445 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2448 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2451 case VK_SHADER_STAGE_GEOMETRY_BIT:
2462 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
2463 Context& context, VkFormat format, SSBOData* extraData,
2464 deUint32 extraDataCount, const void* internalData,
2465 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2466 const VkShaderStageFlags shaderStage)
2468 return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2471 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2472 Context& context, VkFormat format, SSBOData* extraData,
2473 deUint32 extraDataCount, const void* internalData,
2474 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2475 const VkShaderStageFlags shaderStage, const deUint32 tessShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
2477 const DeviceInterface& vk = context.getDeviceInterface();
2478 const VkDevice device = context.getDevice();
2479 const deUint32 maxWidth = getMaxWidth();
2480 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2481 DescriptorSetLayoutBuilder layoutBuilder;
2482 DescriptorPoolBuilder poolBuilder;
2483 DescriptorSetUpdateBuilder updateBuilder;
2484 Move <VkDescriptorPool> descriptorPool;
2485 Move <VkDescriptorSet> descriptorSet;
2487 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device,
2488 context.getBinaryCollection().get("vert"), 0u));
2489 const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(vk, device,
2490 context.getBinaryCollection().get("tesc"), 0u));
2491 const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(vk, device,
2492 context.getBinaryCollection().get("tese"), 0u));
2493 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device,
2494 context.getBinaryCollection().get("fragment"), 0u));
2495 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2497 const VkVertexInputBindingDescription vertexInputBinding =
2500 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
2501 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
2504 const VkVertexInputAttributeDescription vertexInputAttribute =
2508 VK_FORMAT_R32G32B32A32_SFLOAT,
2512 for (deUint32 i = 0u; i < extraDataCount; i++)
2514 if (extraData[i].isImage)
2516 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2520 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2521 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2523 const Allocation& alloc = inputBuffers[i]->getAllocation();
2524 initializeMemory(context, alloc, extraData[i]);
2527 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2528 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2530 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2532 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2534 const deUint32 requiredSubgroupSizes[5] = {0u,
2535 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2536 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2540 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2541 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2542 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2543 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2544 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2545 0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2546 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2547 0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2549 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2550 poolBuilder.addType(inputBuffers[ndx]->getType());
2552 if (extraDataCount > 0)
2554 descriptorPool = poolBuilder.build(vk, device,
2555 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2556 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2559 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2561 if (inputBuffers[buffersNdx]->isImage())
2563 VkDescriptorImageInfo info =
2564 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2565 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2567 updateBuilder.writeSingle(*descriptorSet,
2568 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2569 inputBuffers[buffersNdx]->getType(), &info);
2573 VkDescriptorBufferInfo info =
2574 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2575 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2577 updateBuilder.writeSingle(*descriptorSet,
2578 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2579 inputBuffers[buffersNdx]->getType(), &info);
2583 updateBuilder.update(vk, device);
2585 const VkQueue queue = context.getUniversalQueue();
2586 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2587 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2588 const deUint32 subgroupSize = getSubgroupSize(context);
2589 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2590 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2591 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2592 unsigned totalIterations = 0u;
2593 unsigned failedIterations = 0u;
2594 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2597 const Allocation& alloc = vertexBuffer.getAllocation();
2598 std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2599 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2600 float leftHandPosition = -1.0f;
2602 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2604 data[ndx][0] = leftHandPosition;
2605 leftHandPosition += pixelSize;
2606 data[ndx+1][0] = leftHandPosition;
2609 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2610 flushAlloc(vk, device, alloc);
2613 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2614 const VkViewport viewport = makeViewport(maxWidth, 1u);
2615 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2616 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2617 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2618 const VkDeviceSize vertexBufferOffset = 0u;
2620 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2624 beginCommandBuffer(vk, *cmdBuffer);
2627 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2628 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2630 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2632 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2634 if (extraDataCount > 0)
2636 vk.cmdBindDescriptorSets(*cmdBuffer,
2637 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2638 &descriptorSet.get(), 0u, DE_NULL);
2641 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2642 vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2644 endRenderPass(vk, *cmdBuffer);
2646 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2647 endCommandBuffer(vk, *cmdBuffer);
2649 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2653 const Allocation& allocResult = imageBufferResult.getAllocation();
2654 invalidateAlloc(vk, device, allocResult);
2656 std::vector<const void*> datas;
2657 datas.push_back(allocResult.getHostPtr());
2658 if (!checkResult(internalData, datas, width/2u, subgroupSize))
2663 if (0 < failedIterations)
2665 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2667 context.getTestContext().getLog()
2668 << TestLog::Message << valuesPassed << " / "
2669 << totalIterations << " values passed" << TestLog::EndMessage;
2670 return tcu::TestStatus::fail("Failed!");
2673 return tcu::TestStatus::pass("OK");
2676 bool vkt::subgroups::check(std::vector<const void*> datas,
2677 deUint32 width, deUint32 ref)
2679 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2681 for (deUint32 n = 0; n < width; ++n)
2692 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
2693 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2696 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2697 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2698 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2700 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2703 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
2704 Context& context, VkFormat format, SSBOData* extraData,
2705 deUint32 extraDataCount, const void* internalData,
2706 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2708 return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult,
2712 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(
2713 Context& context, VkFormat format, SSBOData* extraData,
2714 deUint32 extraDataCount, const void* internalData,
2715 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2716 const deUint32 geometryShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
2718 const DeviceInterface& vk = context.getDeviceInterface();
2719 const VkDevice device = context.getDevice();
2720 const deUint32 maxWidth = getMaxWidth();
2721 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2722 DescriptorSetLayoutBuilder layoutBuilder;
2723 DescriptorPoolBuilder poolBuilder;
2724 DescriptorSetUpdateBuilder updateBuilder;
2725 Move <VkDescriptorPool> descriptorPool;
2726 Move <VkDescriptorSet> descriptorSet;
2728 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2729 const Unique<VkShaderModule> geometryShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2730 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2731 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2732 const VkVertexInputBindingDescription vertexInputBinding =
2735 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
2736 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
2739 const VkVertexInputAttributeDescription vertexInputAttribute =
2743 VK_FORMAT_R32G32B32A32_SFLOAT,
2747 for (deUint32 i = 0u; i < extraDataCount; i++)
2749 if (extraData[i].isImage)
2751 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2755 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2756 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2758 const Allocation& alloc = inputBuffers[i]->getAllocation();
2759 initializeMemory(context, alloc, extraData[i]);
2762 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2763 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2765 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2767 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2769 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2771 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2772 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2773 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2774 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2775 0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2776 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2778 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2779 poolBuilder.addType(inputBuffers[ndx]->getType());
2781 if (extraDataCount > 0)
2783 descriptorPool = poolBuilder.build(vk, device,
2784 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2785 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2788 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2790 if (inputBuffers[buffersNdx]->isImage())
2792 VkDescriptorImageInfo info =
2793 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2794 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2796 updateBuilder.writeSingle(*descriptorSet,
2797 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2798 inputBuffers[buffersNdx]->getType(), &info);
2802 VkDescriptorBufferInfo info =
2803 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2804 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2806 updateBuilder.writeSingle(*descriptorSet,
2807 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2808 inputBuffers[buffersNdx]->getType(), &info);
2812 updateBuilder.update(vk, device);
2814 const VkQueue queue = context.getUniversalQueue();
2815 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2816 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2817 const deUint32 subgroupSize = getSubgroupSize(context);
2818 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2819 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2820 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2821 unsigned totalIterations = 0u;
2822 unsigned failedIterations = 0u;
2823 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2826 const Allocation& alloc = vertexBuffer.getAllocation();
2827 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2828 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2829 float leftHandPosition = -1.0f;
2831 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2833 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2834 leftHandPosition += pixelSize;
2837 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2838 flushAlloc(vk, device, alloc);
2841 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2842 const VkViewport viewport = makeViewport(maxWidth, 1u);
2843 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2844 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2845 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2846 const VkDeviceSize vertexBufferOffset = 0u;
2848 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2852 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2854 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2855 initializeMemory(context, alloc, extraData[ndx]);
2858 beginCommandBuffer(vk, *cmdBuffer);
2860 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2862 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2864 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2866 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2868 if (extraDataCount > 0)
2870 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2871 &descriptorSet.get(), 0u, DE_NULL);
2874 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2876 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2878 endRenderPass(vk, *cmdBuffer);
2880 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2882 endCommandBuffer(vk, *cmdBuffer);
2884 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2888 const Allocation& allocResult = imageBufferResult.getAllocation();
2889 invalidateAlloc(vk, device, allocResult);
2891 std::vector<const void*> datas;
2892 datas.push_back(allocResult.getHostPtr());
2893 if (!checkResult(internalData, datas, width, subgroupSize))
2898 if (0 < failedIterations)
2900 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2902 context.getTestContext().getLog()
2903 << TestLog::Message << valuesPassed << " / "
2904 << totalIterations << " values passed" << TestLog::EndMessage;
2906 return tcu::TestStatus::fail("Failed!");
2909 return tcu::TestStatus::pass("OK");
2912 tcu::TestStatus vkt::subgroups::allStages(
2913 Context& context, VkFormat format, SSBOData* extraData,
2914 deUint32 extraDataCount, const void* internalData,
2915 const VerificationFunctor& checkResult,
2916 const vk::VkShaderStageFlags shaderStage)
2918 return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
2919 0u, 0u, 0u, 0u, 0u, DE_NULL);
2922 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize(
2923 Context& context, VkFormat format, SSBOData* extraDatas,
2924 deUint32 extraDatasCount, const void* internalData,
2925 const VerificationFunctor& checkResult,
2926 const VkShaderStageFlags shaderStageTested,
2927 const deUint32 vertexShaderStageCreateFlags,
2928 const deUint32 tessellationControlShaderStageCreateFlags,
2929 const deUint32 tessellationEvalShaderStageCreateFlags,
2930 const deUint32 geometryShaderStageCreateFlags,
2931 const deUint32 fragmentShaderStageCreateFlags,
2932 const deUint32 requiredSubgroupSize[5])
2934 const DeviceInterface& vk = context.getDeviceInterface();
2935 const VkDevice device = context.getDevice();
2936 const deUint32 maxWidth = getMaxWidth();
2937 vector<VkShaderStageFlagBits> stagesVector;
2938 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
2940 Move<VkShaderModule> vertexShaderModule;
2941 Move<VkShaderModule> teCtrlShaderModule;
2942 Move<VkShaderModule> teEvalShaderModule;
2943 Move<VkShaderModule> geometryShaderModule;
2944 Move<VkShaderModule> fragmentShaderModule;
2946 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
2948 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
2950 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2952 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
2953 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
2954 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
2956 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2958 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
2959 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
2960 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2962 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
2964 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
2965 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
2966 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
2968 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2970 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
2971 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
2974 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
2975 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
2976 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
2977 const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
2979 shaderStageRequired = shaderStageTested | shaderStageRequired;
2981 vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
2982 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2984 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
2985 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
2987 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
2989 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2991 // tessellation shaders output line primitives
2992 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
2996 // otherwise points are processed by geometry shader
2997 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3000 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3001 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3003 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3005 DescriptorSetLayoutBuilder layoutBuilder;
3006 // The implicit result SSBO we use to store our outputs from the shader
3007 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3009 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3010 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3011 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3013 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3016 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3018 const deUint32 datasNdx = ndx - stagesCount;
3019 if (extraDatas[datasNdx].isImage)
3021 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3025 const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3026 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3029 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3030 initializeMemory(context, alloc, extraDatas[datasNdx]);
3032 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3033 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3036 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3038 const Unique<VkPipelineLayout> pipelineLayout(
3039 makePipelineLayout(vk, device, *descriptorSetLayout));
3041 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3042 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3043 shaderStageRequired,
3044 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3046 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3047 DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3048 vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3049 geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3051 Move <VkDescriptorPool> descriptorPool;
3052 Move <VkDescriptorSet> descriptorSet;
3054 if (inputBuffers.size() > 0)
3056 DescriptorPoolBuilder poolBuilder;
3058 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3060 poolBuilder.addType(inputBuffers[ndx]->getType());
3063 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3065 // Create descriptor set
3066 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3068 DescriptorSetUpdateBuilder updateBuilder;
3070 for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3073 if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3074 else binding = extraDatas[ndx -stagesCount].binding;
3076 if (inputBuffers[ndx]->isImage())
3078 VkDescriptorImageInfo info =
3079 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3080 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3082 updateBuilder.writeSingle( *descriptorSet,
3083 DescriptorSetUpdateBuilder::Location::binding(binding),
3084 inputBuffers[ndx]->getType(), &info);
3088 VkDescriptorBufferInfo info =
3089 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3090 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3092 updateBuilder.writeSingle( *descriptorSet,
3093 DescriptorSetUpdateBuilder::Location::binding(binding),
3094 inputBuffers[ndx]->getType(), &info);
3098 updateBuilder.update(vk, device);
3102 const VkQueue queue = context.getUniversalQueue();
3103 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3104 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3105 const deUint32 subgroupSize = getSubgroupSize(context);
3106 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3107 unsigned totalIterations = 0u;
3108 unsigned failedIterations = 0u;
3109 Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3110 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3111 const VkViewport viewport = makeViewport(maxWidth, 1u);
3112 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3113 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3114 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3115 const VkImageSubresourceRange subresourceRange =
3117 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
3118 0u, //deUint32 baseMipLevel
3119 1u, //deUint32 levelCount
3120 0u, //deUint32 baseArrayLayer
3121 1u //deUint32 layerCount
3124 const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier(
3125 (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3126 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3127 resultImage.getImage(), subresourceRange);
3129 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3131 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3134 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3135 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3140 beginCommandBuffer(vk, *cmdBuffer);
3142 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3144 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3146 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3148 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3150 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3152 if (stagesCount + extraDatasCount > 0)
3153 vk.cmdBindDescriptorSets(*cmdBuffer,
3154 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3155 &descriptorSet.get(), 0u, DE_NULL);
3157 vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3159 endRenderPass(vk, *cmdBuffer);
3161 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3163 endCommandBuffer(vk, *cmdBuffer);
3165 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3167 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3169 std::vector<const void*> datas;
3170 if (!inputBuffers[ndx]->isImage())
3172 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3173 invalidateAlloc(vk, device, resultAlloc);
3174 // we always have our result data first
3175 datas.push_back(resultAlloc.getHostPtr());
3178 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3180 const deUint32 datasNdx = index - stagesCount;
3181 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3183 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3184 invalidateAlloc(vk, device, resultAlloc);
3185 // we always have our result data first
3186 datas.push_back(resultAlloc.getHostPtr());
3190 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3191 const bool multiCall = ( stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT ||
3192 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
3193 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT ||
3194 stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT );
3195 const deUint32 usedWidth = ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3197 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3200 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3202 std::vector<const void*> datas;
3203 const Allocation& resultAlloc = imageBufferResult.getAllocation();
3204 invalidateAlloc(vk, device, resultAlloc);
3206 // we always have our result data first
3207 datas.push_back(resultAlloc.getHostPtr());
3209 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3211 const deUint32 datasNdx = index - stagesCount;
3212 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3214 const Allocation& alloc = inputBuffers[index]->getAllocation();
3215 invalidateAlloc(vk, device, alloc);
3216 // we always have our result data first
3217 datas.push_back(alloc.getHostPtr());
3221 if (!checkResult(internalData, datas, width, subgroupSize, false))
3225 vk.resetCommandBuffer(*cmdBuffer, 0);
3228 if (0 < failedIterations)
3230 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3232 context.getTestContext().getLog()
3233 << TestLog::Message << valuesPassed << " / "
3234 << totalIterations << " values passed" << TestLog::EndMessage;
3236 return tcu::TestStatus::fail("Failed!");
3240 return tcu::TestStatus::pass("OK");
3243 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
3244 SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
3245 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
3247 return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult,
3251 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(Context& context, vk::VkFormat format,
3252 SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
3253 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
3254 const deUint32 vertexShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
3256 const DeviceInterface& vk = context.getDeviceInterface();
3257 const VkDevice device = context.getDevice();
3258 const VkQueue queue = context.getUniversalQueue();
3259 const deUint32 maxWidth = getMaxWidth();
3260 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3261 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
3262 DescriptorSetLayoutBuilder layoutBuilder;
3263 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3264 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3265 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
3267 const VkVertexInputBindingDescription vertexInputBinding =
3270 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
3271 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
3274 const VkVertexInputAttributeDescription vertexInputAttribute =
3278 VK_FORMAT_R32G32B32A32_SFLOAT,
3282 for (deUint32 i = 0u; i < extraDataCount; i++)
3284 if (extraData[i].isImage)
3286 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3290 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3291 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3293 const Allocation& alloc = inputBuffers[i]->getAllocation();
3294 initializeMemory(context, alloc, extraData[i]);
3297 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3298 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3300 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
3302 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
3304 const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3305 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
3306 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3307 *vertexShaderModule, *fragmentShaderModule,
3308 DE_NULL, DE_NULL, DE_NULL,
3309 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3310 &vertexInputBinding, &vertexInputAttribute, true, format,
3311 vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3312 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3313 DescriptorPoolBuilder poolBuilder;
3314 DescriptorSetUpdateBuilder updateBuilder;
3317 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3318 poolBuilder.addType(inputBuffers[ndx]->getType());
3320 Move <VkDescriptorPool> descriptorPool;
3321 Move <VkDescriptorSet> descriptorSet;
3323 if (extraDataCount > 0)
3325 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3326 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3329 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3331 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3332 initializeMemory(context, alloc, extraData[ndx]);
3335 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3337 if (inputBuffers[buffersNdx]->isImage())
3339 VkDescriptorImageInfo info =
3340 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3341 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3343 updateBuilder.writeSingle(*descriptorSet,
3344 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3345 inputBuffers[buffersNdx]->getType(), &info);
3349 VkDescriptorBufferInfo info =
3350 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3351 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3353 updateBuilder.writeSingle(*descriptorSet,
3354 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3355 inputBuffers[buffersNdx]->getType(), &info);
3358 updateBuilder.update(vk, device);
3360 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3362 const deUint32 subgroupSize = getSubgroupSize(context);
3364 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3366 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3367 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3369 unsigned totalIterations = 0u;
3370 unsigned failedIterations = 0u;
3372 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3375 const Allocation& alloc = vertexBuffer.getAllocation();
3376 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3377 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3378 float leftHandPosition = -1.0f;
3380 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3382 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3383 leftHandPosition += pixelSize;
3386 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3387 flushAlloc(vk, device, alloc);
3390 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3391 const VkViewport viewport = makeViewport(maxWidth, 1u);
3392 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3393 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3394 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3395 const VkDeviceSize vertexBufferOffset = 0u;
3397 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3401 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3403 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3404 initializeMemory(context, alloc, extraData[ndx]);
3407 beginCommandBuffer(vk, *cmdBuffer);
3409 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3411 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3413 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3415 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3417 if (extraDataCount > 0)
3419 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3420 &descriptorSet.get(), 0u, DE_NULL);
3423 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3425 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3427 endRenderPass(vk, *cmdBuffer);
3429 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3431 endCommandBuffer(vk, *cmdBuffer);
3433 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3437 const Allocation& allocResult = imageBufferResult.getAllocation();
3438 invalidateAlloc(vk, device, allocResult);
3440 std::vector<const void*> datas;
3441 datas.push_back(allocResult.getHostPtr());
3442 if (!checkResult(internalData, datas, width, subgroupSize))
3447 if (0 < failedIterations)
3449 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3451 context.getTestContext().getLog()
3452 << TestLog::Message << valuesPassed << " / "
3453 << totalIterations << " values passed" << TestLog::EndMessage;
3455 return tcu::TestStatus::fail("Failed!");
3458 return tcu::TestStatus::pass("OK");
3461 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest(
3462 Context& context, VkFormat format, SSBOData* extraDatas,
3463 deUint32 extraDatasCount, const void* internalData,
3464 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
3465 deUint32 height, deUint32 subgroupSize))
3467 return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult,
3471 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(
3472 Context& context, VkFormat format, SSBOData* extraDatas,
3473 deUint32 extraDatasCount, const void* internalData,
3474 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
3475 deUint32 height, deUint32 subgroupSize),
3476 const deUint32 fragmentShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
3478 const DeviceInterface& vk = context.getDeviceInterface();
3479 const VkDevice device = context.getDevice();
3480 const VkQueue queue = context.getUniversalQueue();
3481 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3482 const Unique<VkShaderModule> vertexShaderModule (createShaderModule
3483 (vk, device, context.getBinaryCollection().get("vert"), 0u));
3484 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule
3485 (vk, device, context.getBinaryCollection().get("fragment"), 0u));
3487 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
3489 for (deUint32 i = 0; i < extraDatasCount; i++)
3491 if (extraDatas[i].isImage)
3493 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3494 static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3498 vk::VkDeviceSize size =
3499 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3500 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3503 const Allocation& alloc = inputBuffers[i]->getAllocation();
3504 initializeMemory(context, alloc, extraDatas[i]);
3507 DescriptorSetLayoutBuilder layoutBuilder;
3509 for (deUint32 i = 0; i < extraDatasCount; i++)
3511 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
3512 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3515 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3516 layoutBuilder.build(vk, device));
3518 const Unique<VkPipelineLayout> pipelineLayout(
3519 makePipelineLayout(vk, device, *descriptorSetLayout));
3521 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3523 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3524 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3525 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3526 *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3527 DE_NULL, DE_NULL, true, VK_FORMAT_R32G32B32A32_SFLOAT,
3528 0u, 0u, 0u, 0u, fragmentShaderStageCreateFlags, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3530 DescriptorPoolBuilder poolBuilder;
3532 // To stop validation complaining, always add at least one type to pool.
3533 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3534 for (deUint32 i = 0; i < extraDatasCount; i++)
3536 poolBuilder.addType(inputBuffers[i]->getType());
3539 Move<VkDescriptorPool> descriptorPool;
3540 // Create descriptor set
3541 Move<VkDescriptorSet> descriptorSet;
3543 if (extraDatasCount > 0)
3545 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3547 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3550 DescriptorSetUpdateBuilder updateBuilder;
3552 for (deUint32 i = 0; i < extraDatasCount; i++)
3554 if (inputBuffers[i]->isImage())
3556 VkDescriptorImageInfo info =
3557 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3558 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3560 updateBuilder.writeSingle(*descriptorSet,
3561 DescriptorSetUpdateBuilder::Location::binding(i),
3562 inputBuffers[i]->getType(), &info);
3566 VkDescriptorBufferInfo info =
3567 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
3568 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3570 updateBuilder.writeSingle(*descriptorSet,
3571 DescriptorSetUpdateBuilder::Location::binding(i),
3572 inputBuffers[i]->getType(), &info);
3576 if (extraDatasCount > 0)
3577 updateBuilder.update(vk, device);
3579 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3581 const deUint32 subgroupSize = getSubgroupSize(context);
3583 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3585 unsigned totalIterations = 0;
3586 unsigned failedIterations = 0;
3588 for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3590 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3595 for (deUint32 i = 0; i < extraDatasCount; i++)
3597 const Allocation& alloc = inputBuffers[i]->getAllocation();
3598 initializeMemory(context, alloc, extraDatas[i]);
3601 VkDeviceSize formatSize = getFormatSizeInBytes(format);
3602 const VkDeviceSize resultImageSizeInBytes =
3603 width * height * formatSize;
3605 Image resultImage(context, width, height, format,
3606 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
3607 VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3609 Buffer resultBuffer(context, resultImageSizeInBytes,
3610 VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3612 const Unique<VkFramebuffer> framebuffer(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3614 beginCommandBuffer(vk, *cmdBuffer);
3616 VkViewport viewport = makeViewport(width, height);
3619 *cmdBuffer, 0, 1, &viewport);
3621 VkRect2D scissor = {{0, 0}, {width, height}};
3624 *cmdBuffer, 0, 1, &scissor);
3626 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3629 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3631 if (extraDatasCount > 0)
3633 vk.cmdBindDescriptorSets(*cmdBuffer,
3634 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3635 &descriptorSet.get(), 0u, DE_NULL);
3638 vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3640 endRenderPass(vk, *cmdBuffer);
3642 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3644 endCommandBuffer(vk, *cmdBuffer);
3646 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3648 std::vector<const void*> datas;
3650 const Allocation& resultAlloc = resultBuffer.getAllocation();
3651 invalidateAlloc(vk, device, resultAlloc);
3653 // we always have our result data first
3654 datas.push_back(resultAlloc.getHostPtr());
3657 if (!checkResult(internalData, datas, width, height, subgroupSize))
3662 vk.resetCommandBuffer(*cmdBuffer, 0);
3666 if (0 < failedIterations)
3668 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3670 context.getTestContext().getLog()
3671 << TestLog::Message << valuesPassed << " / "
3672 << totalIterations << " values passed" << TestLog::EndMessage;
3674 return tcu::TestStatus::fail("Failed!");
3677 return tcu::TestStatus::pass("OK");
3680 Move<VkPipeline> makeComputePipeline(Context& context,
3681 const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
3682 const deUint32 pipelineShaderStageFlags, const deUint32 pipelineCreateFlags, VkPipeline basePipelineHandle,
3683 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ, deUint32 requiredSubgroupSize)
3685 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3687 const vk::VkSpecializationMapEntry entries[3] =
3689 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
3690 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
3691 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3694 const vk::VkSpecializationInfo info =
3696 /* mapEntryCount = */ 3,
3697 /* pMapEntries = */ entries,
3698 /* dataSize = */ sizeof(localSize),
3699 /* pData = */ localSize
3702 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3704 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3705 DE_NULL, // void* pNext;
3706 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3709 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3711 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3712 (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL), // const void* pNext;
3713 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3714 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
3715 shaderModule, // VkShaderModule module;
3716 "main", // const char* pName;
3717 &info, // const VkSpecializationInfo* pSpecializationInfo;
3720 const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
3722 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
3723 DE_NULL, // const void* pNext;
3724 pipelineCreateFlags, // VkPipelineCreateFlags flags;
3725 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
3726 pipelineLayout, // VkPipelineLayout layout;
3727 basePipelineHandle, // VkPipeline basePipelineHandle;
3728 -1, // deInt32 basePipelineIndex;
3731 return createComputePipeline(context.getDeviceInterface(),
3732 context.getDevice(), DE_NULL, &pipelineCreateInfo);
3735 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize(
3736 Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
3737 bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
3738 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
3739 deUint32 subgroupSize),
3740 const deUint32 pipelineShaderStageCreateFlags, const deUint32 numWorkgroups[3],
3741 const deBool isRequiredSubgroupSize, const deUint32 subgroupSize, const deUint32 localSizesToTest[][3], const deUint32 localSizesToTestCount)
3743 const DeviceInterface& vk = context.getDeviceInterface();
3744 const VkDevice device = context.getDevice();
3745 const VkQueue queue = context.getUniversalQueue();
3746 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3747 VkDeviceSize elementSize = getFormatSizeInBytes(format);
3749 VkDeviceSize maxSubgroupSize = maxSupportedSubgroupSize();
3751 if (isRequiredSubgroupSize)
3753 VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
3754 subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
3755 subgroupSizeControlProperties.pNext = DE_NULL;
3757 VkPhysicalDeviceProperties2 properties2;
3758 properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
3759 properties2.pNext = &subgroupSizeControlProperties;
3760 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties2);
3761 maxSubgroupSize = deMax32(subgroupSizeControlProperties.maxSubgroupSize, static_cast<deUint32>(maxSubgroupSize));
3764 const VkDeviceSize resultBufferSize = maxSubgroupSize *
3768 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
3770 Buffer resultBuffer(
3771 context, resultBufferSizeInBytes);
3773 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
3775 for (deUint32 i = 0; i < inputsCount; i++)
3777 if (inputs[i].isImage)
3779 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3780 static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
3784 vk::VkDeviceSize size =
3785 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3786 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3789 const Allocation& alloc = inputBuffers[i]->getAllocation();
3790 initializeMemory(context, alloc, inputs[i]);
3793 DescriptorSetLayoutBuilder layoutBuilder;
3794 layoutBuilder.addBinding(
3795 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3797 for (deUint32 i = 0; i < inputsCount; i++)
3799 layoutBuilder.addBinding(
3800 inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3803 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3804 layoutBuilder.build(vk, device));
3806 const Unique<VkShaderModule> shaderModule(
3807 createShaderModule(vk, device,
3808 context.getBinaryCollection().get("comp"), 0u));
3809 const Unique<VkPipelineLayout> pipelineLayout(
3810 makePipelineLayout(vk, device, *descriptorSetLayout));
3812 DescriptorPoolBuilder poolBuilder;
3814 poolBuilder.addType(resultBuffer.getType());
3816 for (deUint32 i = 0; i < inputsCount; i++)
3818 poolBuilder.addType(inputBuffers[i]->getType());
3821 const Unique<VkDescriptorPool> descriptorPool(
3822 poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3824 // Create descriptor set
3825 const Unique<VkDescriptorSet> descriptorSet(
3826 makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3828 DescriptorSetUpdateBuilder updateBuilder;
3830 const VkDescriptorBufferInfo resultDescriptorInfo =
3831 makeDescriptorBufferInfo(
3832 resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
3834 updateBuilder.writeSingle(*descriptorSet,
3835 DescriptorSetUpdateBuilder::Location::binding(0u),
3836 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
3838 for (deUint32 i = 0; i < inputsCount; i++)
3840 if (inputBuffers[i]->isImage())
3842 VkDescriptorImageInfo info =
3843 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3844 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3846 updateBuilder.writeSingle(*descriptorSet,
3847 DescriptorSetUpdateBuilder::Location::binding(i + 1),
3848 inputBuffers[i]->getType(), &info);
3852 vk::VkDeviceSize size =
3853 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3854 VkDescriptorBufferInfo info =
3855 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
3857 updateBuilder.writeSingle(*descriptorSet,
3858 DescriptorSetUpdateBuilder::Location::binding(i + 1),
3859 inputBuffers[i]->getType(), &info);
3863 updateBuilder.update(vk, device);
3865 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3867 unsigned totalIterations = 0;
3868 unsigned failedIterations = 0;
3870 const Unique<VkCommandBuffer> cmdBuffer(
3871 makeCommandBuffer(context, *cmdPool));
3873 Move<VkPipeline> *pipelines = new Move<VkPipeline>[localSizesToTestCount - 1];
3875 makeComputePipeline(context, *pipelineLayout, *shaderModule,
3876 pipelineShaderStageCreateFlags, VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, (VkPipeline) DE_NULL,
3877 localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2],
3878 isRequiredSubgroupSize ? subgroupSize : 0u);
3880 for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
3882 const deUint32 nextX = localSizesToTest[index][0];
3883 const deUint32 nextY = localSizesToTest[index][1];
3884 const deUint32 nextZ = localSizesToTest[index][2];
3887 makeComputePipeline(context, *pipelineLayout, *shaderModule,
3888 pipelineShaderStageCreateFlags, VK_PIPELINE_CREATE_DERIVATIVE_BIT, *pipelines[0],
3889 nextX, nextY, nextZ,
3890 isRequiredSubgroupSize ? subgroupSize : 0u);
3893 for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
3896 // we are running one test
3899 beginCommandBuffer(vk, *cmdBuffer);
3901 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelines[index]);
3903 vk.cmdBindDescriptorSets(*cmdBuffer,
3904 VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
3905 &descriptorSet.get(), 0u, DE_NULL);
3907 vk.cmdDispatch(*cmdBuffer,numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
3909 endCommandBuffer(vk, *cmdBuffer);
3911 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3913 std::vector<const void*> datas;
3916 const Allocation& resultAlloc = resultBuffer.getAllocation();
3917 invalidateAlloc(vk, device, resultAlloc);
3919 // we always have our result data first
3920 datas.push_back(resultAlloc.getHostPtr());
3923 for (deUint32 i = 0; i < inputsCount; i++)
3925 if (!inputBuffers[i]->isImage())
3927 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
3928 invalidateAlloc(vk, device, resultAlloc);
3930 // we always have our result data first
3931 datas.push_back(resultAlloc.getHostPtr());
3935 if (!checkResult(internalData, datas, numWorkgroups, localSizesToTest[index], subgroupSize))
3940 vk.resetCommandBuffer(*cmdBuffer, 0);
3945 if (0 < failedIterations)
3947 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3949 context.getTestContext().getLog()
3950 << TestLog::Message << valuesPassed << " / "
3951 << totalIterations << " values passed" << TestLog::EndMessage;
3953 return tcu::TestStatus::fail("Failed!");
3956 return tcu::TestStatus::pass("OK");
3959 tcu::TestStatus vkt::subgroups::makeComputeTest(
3960 Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
3961 bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
3962 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
3963 deUint32 subgroupSize),
3964 deUint32 requiredSubgroupSize, const deUint32 pipelineShaderStageCreateFlags)
3966 const deUint32 numWorkgroups[3] = {4, 2, 2};
3967 deUint32 subgroupSize = requiredSubgroupSize;
3969 if(requiredSubgroupSize == 0)
3970 subgroupSize = vkt::subgroups::getSubgroupSize(context);
3972 const deUint32 localSizesToTestCount = 8;
3973 deUint32 localSizesToTest[localSizesToTestCount][3] =
3976 {subgroupSize, 1, 1},
3977 {1, subgroupSize, 1},
3978 {1, 1, subgroupSize},
3982 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
3985 return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
3986 numWorkgroups, requiredSubgroupSize != 0u, subgroupSize, localSizesToTest, localSizesToTestCount);