1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2015 Mobica Ltd.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and/or associated documentation files (the
9 * "Materials"), to deal in the Materials without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Materials, and to
12 * permit persons to whom the Materials are furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice(s) and this permission notice shall be included
16 * in all copies or substantial portions of the Materials.
18 * The Materials are Confidential Information as defined by the
19 * Khronos Membership Agreement until designated non-confidential by Khronos,
20 * at which point this condition clause shall be removed.
22 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
26 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
27 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
28 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
32 * \brief Indirect Compute Dispatch tests
33 *//*--------------------------------------------------------------------*/
35 #include "vktComputeIndirectComputeDispatchTests.hpp"
36 #include "vktComputeTestsUtil.hpp"
44 #include "vkRefUtil.hpp"
45 #include "vktTestCase.hpp"
46 #include "vktTestCaseUtil.hpp"
47 #include "vkPlatform.hpp"
48 #include "vkPrograms.hpp"
49 #include "vkMemUtil.hpp"
50 #include "vkBuilderUtil.hpp"
51 #include "vkQueryUtil.hpp"
53 #include "tcuVector.hpp"
54 #include "tcuVectorUtil.hpp"
55 #include "tcuTestLog.hpp"
56 #include "tcuRGBA.hpp"
57 #include "tcuStringTemplate.hpp"
59 #include "deUniquePtr.hpp"
60 #include "deSharedPtr.hpp"
61 #include "deStringUtil.hpp"
62 #include "deArrayUtil.hpp"
64 #include "gluShaderUtil.hpp"
75 RESULT_BLOCK_BASE_SIZE = 4 * (int)sizeof(deUint32), // uvec3 + uint
76 RESULT_BLOCK_NUM_PASSED_OFFSET = 3 * (int)sizeof(deUint32),
77 INDIRECT_COMMAND_OFFSET = 3 * (int)sizeof(deUint32),
80 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface& instance_interface,
81 const vk::VkPhysicalDevice physicalDevice,
82 const vk::VkDeviceSize baseSize)
84 // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
85 vk::VkPhysicalDeviceProperties deviceProperties;
86 instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
87 vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
89 if (alignment == 0 || (baseSize % alignment == 0))
92 return (baseSize / alignment + 1)*alignment;
95 struct DispatchCommand
97 DispatchCommand (const deIntptr offset,
98 const tcu::UVec3& numWorkGroups)
100 , m_numWorkGroups (numWorkGroups) {}
103 tcu::UVec3 m_numWorkGroups;
106 typedef std::vector<DispatchCommand> DispatchCommandsVec;
108 struct DispatchCaseDesc
110 DispatchCaseDesc (const char* name,
111 const char* description,
112 const deUintptr bufferSize,
113 const tcu::UVec3 workGroupSize,
114 const DispatchCommandsVec& dispatchCommands)
116 , m_description (description)
117 , m_bufferSize (bufferSize)
118 , m_workGroupSize (workGroupSize)
119 , m_dispatchCommands (dispatchCommands) {}
122 const char* m_description;
123 const deUintptr m_bufferSize;
124 const tcu::UVec3 m_workGroupSize;
125 const DispatchCommandsVec m_dispatchCommands;
128 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
131 IndirectDispatchInstanceBufferUpload (Context& context,
132 const std::string& name,
133 const deUintptr bufferSize,
134 const tcu::UVec3& workGroupSize,
135 const DispatchCommandsVec& dispatchCommands);
137 virtual ~IndirectDispatchInstanceBufferUpload (void) {}
139 virtual tcu::TestStatus iterate (void);
142 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer,
143 const Buffer& indirectBuffer);
145 deBool verifyResultBuffer (const Buffer& resultBuffer,
146 const vk::VkDeviceSize resultBlockSize,
147 const vk::VkDeviceSize resultBufferSize) const;
150 const std::string m_name;
152 const vk::DeviceInterface& m_device_interface;
153 const vk::VkDevice m_device;
155 const vk::VkQueue m_queue;
156 const deUint32 m_queueFamilyIndex;
158 const deUintptr m_bufferSize;
159 const tcu::UVec3 m_workGroupSize;
160 const DispatchCommandsVec m_dispatchCommands;
162 vk::Allocator& m_allocator;
165 IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
166 IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
169 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context& context,
170 const std::string& name,
171 const deUintptr bufferSize,
172 const tcu::UVec3& workGroupSize,
173 const DispatchCommandsVec& dispatchCommands)
174 : vkt::TestInstance (context)
175 , m_context (context)
177 , m_device_interface (context.getDeviceInterface())
178 , m_device (context.getDevice())
179 , m_queue (context.getUniversalQueue())
180 , m_queueFamilyIndex (context.getUniversalQueueFamilyIndex())
181 , m_bufferSize (bufferSize)
182 , m_workGroupSize (workGroupSize)
183 , m_dispatchCommands (dispatchCommands)
184 , m_allocator (context.getDefaultAllocator())
188 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
190 DE_UNREF(commandBuffer);
192 const vk::Allocation& alloc = indirectBuffer.getAllocation();
193 deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
195 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
197 DE_ASSERT(cmdIter->m_offset >= 0);
198 DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
199 DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
201 deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
203 dstPtr[0] = cmdIter->m_numWorkGroups[0];
204 dstPtr[1] = cmdIter->m_numWorkGroups[1];
205 dstPtr[2] = cmdIter->m_numWorkGroups[2];
208 vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), m_bufferSize);
211 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
213 tcu::TestContext& testCtx = m_context.getTestContext();
215 testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
217 tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
219 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
222 << tcu::TestLog::Message
223 << cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
224 << tcu::TestLog::EndMessage;
228 // Create result buffer
229 const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
230 const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
233 m_device_interface, m_device, m_allocator,
234 makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
235 vk::MemoryRequirement::HostVisible);
238 const vk::Allocation& alloc = resultBuffer.getAllocation();
239 deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
241 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
243 deUint8* const dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
245 *(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
246 *(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
247 *(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
248 *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
251 vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
254 // Create verify compute shader
255 const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule(
256 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u));
258 // Create descriptorSetLayout
259 vk::DescriptorSetLayoutBuilder layoutBuilder;
260 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
261 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
263 // Create compute pipeline
264 const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout));
265 const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(m_device_interface, m_device, *pipelineLayout, *verifyShader));
267 // Create descriptor pool
268 const vk::Unique<vk::VkDescriptorPool> descriptorPool(
269 vk::DescriptorPoolBuilder()
270 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
271 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
273 const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
274 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
276 // Create command buffer
277 const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(m_device_interface, m_device, m_queueFamilyIndex));
278 const vk::Unique<vk::VkCommandBuffer> cmdBuffer(makeCommandBuffer(m_device_interface, m_device, *cmdPool));
280 // Begin recording commands
281 beginCommandBuffer(m_device_interface, *cmdBuffer);
283 // Create indirect buffer
284 Buffer indirectBuffer(
285 m_device_interface, m_device, m_allocator,
286 makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
287 vk::MemoryRequirement::HostVisible);
288 fillIndirectBufferData(*cmdBuffer, indirectBuffer);
290 // Bind compute pipeline
291 m_device_interface.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
293 // Allocate descriptor sets
294 DynArray< vk::Move<vk::VkDescriptorSet> > descriptorSets(m_dispatchCommands.size());
296 vk::VkDeviceSize curOffset = 0;
298 // Create descriptor sets
299 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
301 descriptorSets[cmdNdx] = makeDescriptorSet(m_device_interface, m_device, *descriptorPool, *descriptorSetLayout);
303 const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
305 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
306 descriptorSetBuilder.writeSingle(*descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
307 descriptorSetBuilder.update(m_device_interface, m_device);
309 // Bind descriptor set
310 m_device_interface.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSets[cmdNdx].get(), 0u, DE_NULL);
312 // Dispatch indirect compute command
313 m_device_interface.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
315 curOffset += resultBlockSize;
318 // Insert memory barrier
319 m_device_interface.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
320 0, (const vk::VkMemoryBarrier*)DE_NULL,
322 0, (const vk::VkImageMemoryBarrier*)DE_NULL);
324 // End recording commands
325 endCommandBuffer(m_device_interface, *cmdBuffer);
327 // Wait for command buffer execution finish
328 submitCommandsAndWait(m_device_interface, m_device, m_queue, *cmdBuffer);
330 // Check if result buffer contains valid values
331 if (verifyResultBuffer(resultBuffer, resultBlockSize, resultBufferSize))
332 return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
334 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
337 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const Buffer& resultBuffer,
338 const vk::VkDeviceSize resultBlockSize,
339 const vk::VkDeviceSize resultBufferSize) const
342 const vk::Allocation& alloc = resultBuffer.getAllocation();
343 vk::invalidateMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
345 const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
347 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
349 const DispatchCommand& cmd = m_dispatchCommands[cmdNdx];
350 const deUint8* const srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
351 const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
352 const deUint32 numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
353 const deUint32 numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
354 const deUint32 expectedCount = numInvocationsPerGroup * numGroups;
356 if (numPassed != expectedCount)
358 tcu::TestContext& testCtx = m_context.getTestContext();
361 << tcu::TestLog::Message
362 << "ERROR: got invalid result for invocation " << cmdNdx
363 << ": got numPassed = " << numPassed << ", expected " << expectedCount
364 << tcu::TestLog::EndMessage;
373 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
376 IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx,
377 const DispatchCaseDesc& caseDesc,
378 const glu::GLSLVersion glslVersion);
380 virtual ~IndirectDispatchCaseBufferUpload (void) {}
382 virtual void initPrograms (vk::SourceCollections& programCollection) const;
383 virtual TestInstance* createInstance (Context& context) const;
386 const deUintptr m_bufferSize;
387 const tcu::UVec3 m_workGroupSize;
388 const DispatchCommandsVec m_dispatchCommands;
389 const glu::GLSLVersion m_glslVersion;
392 IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
393 IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
396 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx,
397 const DispatchCaseDesc& caseDesc,
398 const glu::GLSLVersion glslVersion)
399 : vkt::TestCase (testCtx, caseDesc.m_name, caseDesc.m_description)
400 , m_bufferSize (caseDesc.m_bufferSize)
401 , m_workGroupSize (caseDesc.m_workGroupSize)
402 , m_dispatchCommands (caseDesc.m_dispatchCommands)
403 , m_glslVersion (glslVersion)
407 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
409 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
411 std::ostringstream verifyBuffer;
414 << versionDecl << "\n"
415 << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
416 << "layout(set = 0, binding = 0, std430) buffer Result\n"
418 << " uvec3 expectedGroupCount;\n"
419 << " coherent uint numPassed;\n"
421 << "void main (void)\n"
423 << " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
424 << " atomicAdd(result.numPassed, 1u);\n"
427 std::map<std::string, std::string> args;
429 args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
430 args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
431 args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
433 std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
435 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
438 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
440 return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
443 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
446 IndirectDispatchInstanceBufferGenerate (Context& context,
447 const std::string& name,
448 const deUintptr bufferSize,
449 const tcu::UVec3& workGroupSize,
450 const DispatchCommandsVec& dispatchCommands)
451 : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands) {}
453 virtual ~IndirectDispatchInstanceBufferGenerate (void) {}
456 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer,
457 const Buffer& indirectBuffer);
459 vk::Move<vk::VkDescriptorPool> m_descriptorPool;
460 vk::Move<vk::VkDescriptorSet> m_descriptorSet;
461 vk::Move<vk::VkPipelineLayout> m_pipelineLayout;
462 vk::Move<vk::VkPipeline> m_computePipeline;
465 IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
466 IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
469 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
471 // Create compute shader that generates data for indirect buffer
472 const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
473 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
475 // Create descriptorSetLayout
476 vk::DescriptorSetLayoutBuilder layoutBuilder;
477 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
478 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
480 // Create compute pipeline
481 m_pipelineLayout = makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout);
482 m_computePipeline = makeComputePipeline(m_device_interface, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
484 // Create descriptor pool
485 m_descriptorPool = vk::DescriptorPoolBuilder()
486 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
487 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
489 // Create descriptor set
490 m_descriptorSet = makeDescriptorSet(m_device_interface, m_device, *m_descriptorPool, *descriptorSetLayout);
492 const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
494 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
495 descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
496 descriptorSetBuilder.update(m_device_interface, m_device);
498 const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
499 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
501 // Bind compute pipeline
502 m_device_interface.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
504 // Bind descriptor set
505 m_device_interface.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
507 // Dispatch compute command
508 m_device_interface.cmdDispatch(commandBuffer, 1u, 1u, 1u);
510 // Insert memory barrier
511 m_device_interface.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
512 0, (const vk::VkMemoryBarrier*)DE_NULL,
514 0, (const vk::VkImageMemoryBarrier*)DE_NULL);
517 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
520 IndirectDispatchCaseBufferGenerate (tcu::TestContext& testCtx,
521 const DispatchCaseDesc& caseDesc,
522 const glu::GLSLVersion glslVersion)
523 : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {}
525 virtual ~IndirectDispatchCaseBufferGenerate (void) {}
527 virtual void initPrograms (vk::SourceCollections& programCollection) const;
528 virtual TestInstance* createInstance (Context& context) const;
531 IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
532 IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
535 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
537 IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
539 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
541 std::ostringstream computeBuffer;
545 << versionDecl << "\n"
546 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
547 << "layout(set = 0, binding = 0, std430) buffer Out\n"
549 << " highp uint data[];\n"
551 << "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
553 << " data[offset+0u] = numWorkGroups.x;\n"
554 << " data[offset+1u] = numWorkGroups.y;\n"
555 << " data[offset+2u] = numWorkGroups.z;\n"
557 << "void main (void)\n"
561 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
563 const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
564 DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
567 << "\twriteCmd(" << offs << "u, uvec3("
568 << cmdIter->m_numWorkGroups.x() << "u, "
569 << cmdIter->m_numWorkGroups.y() << "u, "
570 << cmdIter->m_numWorkGroups.z() << "u));\n";
574 computeBuffer << "}\n";
576 std::string computeString = computeBuffer.str();
578 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
581 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
583 return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
586 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
588 DispatchCommandsVec vec;
593 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
594 const DispatchCommand& cmd1,
595 const DispatchCommand& cmd2,
596 const DispatchCommand& cmd3,
597 const DispatchCommand& cmd4)
599 DispatchCommandsVec vec;
608 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
609 const DispatchCommand& cmd1,
610 const DispatchCommand& cmd2,
611 const DispatchCommand& cmd3,
612 const DispatchCommand& cmd4,
613 const DispatchCommand& cmd5,
614 const DispatchCommand& cmd6)
616 DispatchCommandsVec vec;
629 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx)
631 static const DispatchCaseDesc s_dispatchCases[] =
633 DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
634 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)))
636 DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
637 commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5)))
639 DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
640 commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3)))
642 DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
643 commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1)))
645 DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1),
646 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1)))
648 DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1),
649 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3)))
651 DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
652 commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0)))
654 DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
655 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
656 DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
657 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
658 DispatchCommand(40, tcu::UVec3(1, 1, 7)),
659 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
661 DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
662 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
663 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
664 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
665 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
666 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
667 DispatchCommand(52, tcu::UVec3(1, 1, 4)),
668 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
672 de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests"));
674 tcu::TestCaseGroup* const groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", "");
675 indirectComputeDispatchTests->addChild(groupBufferUpload);
677 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
679 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
682 tcu::TestCaseGroup* const groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", "");
683 indirectComputeDispatchTests->addChild(groupBufferGenerate);
685 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
687 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
690 return indirectComputeDispatchTests.release();