dEQP-VK.renderpass: Set IMAGE_USAGE_TRANSFER_SRC_BIT when needed
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / compute / vktComputeIndirectComputeDispatchTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 Mobica Ltd.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and/or associated documentation files (the
9  * "Materials"), to deal in the Materials without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Materials, and to
12  * permit persons to whom the Materials are furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice(s) and this permission notice shall be included
16  * in all copies or substantial portions of the Materials.
17  *
18  * The Materials are Confidential Information as defined by the
19  * Khronos Membership Agreement until designated non-confidential by Khronos,
20  * at which point this condition clause shall be removed.
21  *
22  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
26  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
27  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
28  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
29  *
30  *//*!
31  * \file
32  * \brief Indirect Compute Dispatch tests
33  *//*--------------------------------------------------------------------*/
34
35 #include "vktComputeIndirectComputeDispatchTests.hpp"
36 #include "vktComputeTestsUtil.hpp"
37
38 #include <string>
39 #include <map>
40 #include <vector>
41
42 #include "vkDefs.hpp"
43 #include "vkRef.hpp"
44 #include "vkRefUtil.hpp"
45 #include "vktTestCase.hpp"
46 #include "vktTestCaseUtil.hpp"
47 #include "vkPlatform.hpp"
48 #include "vkPrograms.hpp"
49 #include "vkMemUtil.hpp"
50 #include "vkBuilderUtil.hpp"
51 #include "vkQueryUtil.hpp"
52
53 #include "tcuVector.hpp"
54 #include "tcuVectorUtil.hpp"
55 #include "tcuTestLog.hpp"
56 #include "tcuRGBA.hpp"
57 #include "tcuStringTemplate.hpp"
58
59 #include "deUniquePtr.hpp"
60 #include "deSharedPtr.hpp"
61 #include "deStringUtil.hpp"
62 #include "deArrayUtil.hpp"
63
64 #include "gluShaderUtil.hpp"
65
66 namespace vkt
67 {
68 namespace compute
69 {
70 namespace
71 {
72
73 enum
74 {
75         RESULT_BLOCK_BASE_SIZE                  = 4 * (int)sizeof(deUint32), // uvec3 + uint
76         RESULT_BLOCK_NUM_PASSED_OFFSET  = 3 * (int)sizeof(deUint32),
77         INDIRECT_COMMAND_OFFSET                 = 3 * (int)sizeof(deUint32),
78 };
79
80 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface&        instance_interface,
81                                                                                         const vk::VkPhysicalDevice              physicalDevice,
82                                                                                         const vk::VkDeviceSize                  baseSize)
83 {
84         // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
85         vk::VkPhysicalDeviceProperties deviceProperties;
86         instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
87         vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
88
89         if (alignment == 0 || (baseSize % alignment == 0))
90                 return baseSize;
91         else
92                 return (baseSize / alignment + 1)*alignment;
93 }
94
95 struct DispatchCommand
96 {
97                                 DispatchCommand (const deIntptr         offset,
98                                                                  const tcu::UVec3&      numWorkGroups)
99                                         : m_offset                      (offset)
100                                         , m_numWorkGroups       (numWorkGroups) {}
101
102         deIntptr        m_offset;
103         tcu::UVec3      m_numWorkGroups;
104 };
105
106 typedef std::vector<DispatchCommand> DispatchCommandsVec;
107
108 struct DispatchCaseDesc
109 {
110                                                                 DispatchCaseDesc (const char*                                   name,
111                                                                                                   const char*                                   description,
112                                                                                                   const deUintptr                               bufferSize,
113                                                                                                   const tcu::UVec3                              workGroupSize,
114                                                                                                   const DispatchCommandsVec&    dispatchCommands)
115                                                                         : m_name                                (name)
116                                                                         , m_description                 (description)
117                                                                         , m_bufferSize                  (bufferSize)
118                                                                         , m_workGroupSize               (workGroupSize)
119                                                                         , m_dispatchCommands    (dispatchCommands) {}
120
121         const char*                                     m_name;
122         const char*                                     m_description;
123         const deUintptr                         m_bufferSize;
124         const tcu::UVec3                        m_workGroupSize;
125         const DispatchCommandsVec       m_dispatchCommands;
126 };
127
128 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
129 {
130 public:
131                                                                         IndirectDispatchInstanceBufferUpload    (Context&                                       context,
132                                                                                                                                                          const std::string&                     name,
133                                                                                                                                                          const deUintptr                        bufferSize,
134                                                                                                                                                          const tcu::UVec3&                      workGroupSize,
135                                                                                                                                                          const DispatchCommandsVec& dispatchCommands);
136
137         virtual                                                 ~IndirectDispatchInstanceBufferUpload   (void) {}
138
139         virtual tcu::TestStatus                 iterate                                                                 (void);
140
141 protected:
142         virtual void                                    fillIndirectBufferData                                  (const vk::VkCommandBuffer      commandBuffer,
143                                                                                                                                                          const Buffer&                          indirectBuffer);
144
145         deBool                                                  verifyResultBuffer                                              (const Buffer&                          resultBuffer,
146                                                                                                                                                          const vk::VkDeviceSize         resultBlockSize,
147                                                                                                                                                          const vk::VkDeviceSize         resultBufferSize) const;
148
149         Context&                                                m_context;
150         const std::string                               m_name;
151
152         const vk::DeviceInterface&              m_device_interface;
153         const vk::VkDevice                              m_device;
154
155         const vk::VkQueue                               m_queue;
156         const deUint32                                  m_queueFamilyIndex;
157
158         const deUintptr                                 m_bufferSize;
159         const tcu::UVec3                                m_workGroupSize;
160         const DispatchCommandsVec               m_dispatchCommands;
161
162         vk::Allocator&                                  m_allocator;
163
164 private:
165         IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
166         IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
167 };
168
169 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context&                                    context,
170                                                                                                                                                         const std::string&                      name,
171                                                                                                                                                         const deUintptr                         bufferSize,
172                                                                                                                                                         const tcu::UVec3&                       workGroupSize,
173                                                                                                                                                         const DispatchCommandsVec&      dispatchCommands)
174         : vkt::TestInstance             (context)
175         , m_context                             (context)
176         , m_name                                (name)
177         , m_device_interface    (context.getDeviceInterface())
178         , m_device                              (context.getDevice())
179         , m_queue                               (context.getUniversalQueue())
180         , m_queueFamilyIndex    (context.getUniversalQueueFamilyIndex())
181         , m_bufferSize                  (bufferSize)
182         , m_workGroupSize               (workGroupSize)
183         , m_dispatchCommands    (dispatchCommands)
184         , m_allocator                   (context.getDefaultAllocator())
185 {
186 }
187
188 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
189 {
190         DE_UNREF(commandBuffer);
191
192         const vk::Allocation& alloc = indirectBuffer.getAllocation();
193         deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
194
195         for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
196         {
197                 DE_ASSERT(cmdIter->m_offset >= 0);
198                 DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
199                 DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
200
201                 deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
202
203                 dstPtr[0] = cmdIter->m_numWorkGroups[0];
204                 dstPtr[1] = cmdIter->m_numWorkGroups[1];
205                 dstPtr[2] = cmdIter->m_numWorkGroups[2];
206         }
207
208         vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), m_bufferSize);
209 }
210
211 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
212 {
213         tcu::TestContext& testCtx = m_context.getTestContext();
214
215         testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
216         {
217                 tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
218
219                 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
220                 {
221                         testCtx.getLog()
222                                 << tcu::TestLog::Message
223                                 << cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
224                                 << tcu::TestLog::EndMessage;
225                 }
226         }
227
228         // Create result buffer
229         const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
230         const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
231
232         Buffer resultBuffer(
233                 m_device_interface, m_device, m_allocator,
234                 makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
235                 vk::MemoryRequirement::HostVisible);
236
237         {
238                 const vk::Allocation& alloc = resultBuffer.getAllocation();
239                 deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
240
241                 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
242                 {
243                         deUint8* const  dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
244
245                         *(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
246                         *(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
247                         *(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
248                         *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
249                 }
250
251                 vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
252         }
253
254         // Create verify compute shader
255         const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule(
256                 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u));
257
258         // Create descriptorSetLayout
259         vk::DescriptorSetLayoutBuilder layoutBuilder;
260         layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
261         vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
262
263         // Create compute pipeline
264         const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout));
265         const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(m_device_interface, m_device, *pipelineLayout, *verifyShader));
266
267         // Create descriptor pool
268         const vk::Unique<vk::VkDescriptorPool> descriptorPool(
269                 vk::DescriptorPoolBuilder()
270                 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
271                 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
272
273         const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
274                 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
275         const void* const postBarriers[] = { &ssboPostBarrier };
276
277         // Create command buffer
278         const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(m_device_interface, m_device, m_queueFamilyIndex));
279         const vk::Unique<vk::VkCommandBuffer> cmdBuffer(makeCommandBuffer(m_device_interface, m_device, *cmdPool));
280
281         // Begin recording commands
282         beginCommandBuffer(m_device_interface, *cmdBuffer);
283
284         // Create indirect buffer
285         Buffer indirectBuffer(
286                 m_device_interface, m_device, m_allocator,
287                 makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
288                 vk::MemoryRequirement::HostVisible);
289         fillIndirectBufferData(*cmdBuffer, indirectBuffer);
290
291         // Bind compute pipeline
292         m_device_interface.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
293
294         // Allocate descriptor sets
295         DynArray< vk::Move<vk::VkDescriptorSet> > descriptorSets(m_dispatchCommands.size());
296
297         vk::VkDeviceSize curOffset = 0;
298
299         // Create descriptor sets
300         for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
301         {
302                 descriptorSets[cmdNdx] = makeDescriptorSet(m_device_interface, m_device, *descriptorPool, *descriptorSetLayout);
303
304                 const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
305
306                 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
307                 descriptorSetBuilder.writeSingle(*descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
308                 descriptorSetBuilder.update(m_device_interface, m_device);
309
310                 // Bind descriptor set
311                 m_device_interface.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSets[cmdNdx].get(), 0u, DE_NULL);
312
313                 // Dispatch indirect compute command
314                 m_device_interface.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
315
316                 curOffset += resultBlockSize;
317         }
318
319         // Insert memory barrier
320         m_device_interface.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_FALSE, DE_LENGTH_OF_ARRAY(postBarriers), postBarriers);
321
322         // End recording commands
323         endCommandBuffer(m_device_interface, *cmdBuffer);
324
325         // Wait for command buffer execution finish
326         submitCommandsAndWait(m_device_interface, m_device, m_queue, *cmdBuffer);
327
328         // Check if result buffer contains valid values
329         if (verifyResultBuffer(resultBuffer, resultBlockSize, resultBufferSize))
330                 return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
331         else
332                 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
333 }
334
335 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const Buffer&                  resultBuffer,
336                                                                                                                                  const vk::VkDeviceSize resultBlockSize,
337                                                                                                                                  const vk::VkDeviceSize resultBufferSize) const
338 {
339         deBool allOk = true;
340         const vk::Allocation& alloc = resultBuffer.getAllocation();
341         vk::invalidateMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
342
343         const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
344
345         for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
346         {
347                 const DispatchCommand&  cmd = m_dispatchCommands[cmdNdx];
348                 const deUint8* const    srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
349                 const deUint32                  numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
350                 const deUint32                  numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
351                 const deUint32                  numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
352                 const deUint32                  expectedCount = numInvocationsPerGroup * numGroups;
353
354                 if (numPassed != expectedCount)
355                 {
356                         tcu::TestContext& testCtx = m_context.getTestContext();
357
358                         testCtx.getLog()
359                                 << tcu::TestLog::Message
360                                 << "ERROR: got invalid result for invocation " << cmdNdx
361                                 << ": got numPassed = " << numPassed << ", expected " << expectedCount
362                                 << tcu::TestLog::EndMessage;
363
364                         allOk = false;
365                 }
366         }
367
368         return allOk;
369 }
370
371 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
372 {
373 public:
374                                                                 IndirectDispatchCaseBufferUpload        (tcu::TestContext&                      testCtx,
375                                                                                                                                          const DispatchCaseDesc&        caseDesc,
376                                                                                                                                          const glu::GLSLVersion         glslVersion);
377
378         virtual                                         ~IndirectDispatchCaseBufferUpload       (void) {}
379
380         virtual void                            initPrograms                                            (vk::SourceCollections&         programCollection) const;
381         virtual TestInstance*           createInstance                                          (Context&                                       context) const;
382
383 protected:
384         const deUintptr                         m_bufferSize;
385         const tcu::UVec3                        m_workGroupSize;
386         const DispatchCommandsVec       m_dispatchCommands;
387         const glu::GLSLVersion          m_glslVersion;
388
389 private:
390         IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
391         IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
392 };
393
394 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext&           testCtx,
395                                                                                                                                         const DispatchCaseDesc& caseDesc,
396                                                                                                                                         const glu::GLSLVersion  glslVersion)
397         : vkt::TestCase                 (testCtx, caseDesc.m_name, caseDesc.m_description)
398         , m_bufferSize                  (caseDesc.m_bufferSize)
399         , m_workGroupSize               (caseDesc.m_workGroupSize)
400         , m_dispatchCommands    (caseDesc.m_dispatchCommands)
401         , m_glslVersion                 (glslVersion)
402 {
403 }
404
405 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
406 {
407         const char* const       versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
408
409         std::ostringstream      verifyBuffer;
410
411         verifyBuffer
412                 << versionDecl << "\n"
413                 << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
414                 << "layout(set = 0, binding = 0, std430) buffer Result\n"
415                 << "{\n"
416                 << "    uvec3           expectedGroupCount;\n"
417                 << "    coherent uint   numPassed;\n"
418                 << "} result;\n"
419                 << "void main (void)\n"
420                 << "{\n"
421                 << "    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
422                 << "        atomicAdd(result.numPassed, 1u);\n"
423                 << "}\n";
424
425         std::map<std::string, std::string> args;
426
427         args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
428         args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
429         args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
430
431         std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
432
433         programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
434 }
435
436 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
437 {
438         return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
439 }
440
441 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
442 {
443 public:
444                                                                         IndirectDispatchInstanceBufferGenerate  (Context&                                       context,
445                                                                                                                                                          const std::string&                     name,
446                                                                                                                                                          const deUintptr                        bufferSize,
447                                                                                                                                                          const tcu::UVec3&                      workGroupSize,
448                                                                                                                                                          const DispatchCommandsVec&     dispatchCommands)
449                                                                                 : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands) {}
450
451         virtual                                                 ~IndirectDispatchInstanceBufferGenerate (void) {}
452
453 protected:
454         virtual void                                    fillIndirectBufferData                                  (const vk::VkCommandBuffer      commandBuffer,
455                                                                                                                                                          const Buffer&                          indirectBuffer);
456
457         vk::Move<vk::VkDescriptorPool>  m_descriptorPool;
458         vk::Move<vk::VkDescriptorSet>   m_descriptorSet;
459         vk::Move<vk::VkPipelineLayout>  m_pipelineLayout;
460         vk::Move<vk::VkPipeline>                m_computePipeline;
461
462 private:
463         IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
464         IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
465 };
466
467 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
468 {
469         // Create compute shader that generates data for indirect buffer
470         const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
471                 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
472
473         // Create descriptorSetLayout
474         vk::DescriptorSetLayoutBuilder layoutBuilder;
475         layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
476         vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
477
478         // Create compute pipeline
479         m_pipelineLayout = makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout);
480         m_computePipeline = makeComputePipeline(m_device_interface, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
481
482         // Create descriptor pool
483         m_descriptorPool = vk::DescriptorPoolBuilder()
484                 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
485                 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
486
487         // Create descriptor set
488         m_descriptorSet = makeDescriptorSet(m_device_interface, m_device, *m_descriptorPool, *descriptorSetLayout);
489
490         const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
491
492         vk::DescriptorSetUpdateBuilder  descriptorSetBuilder;
493         descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
494         descriptorSetBuilder.update(m_device_interface, m_device);
495
496         const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
497                 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
498
499         const void* const postBarriers[] = { &bufferBarrier };
500
501         // Bind compute pipeline
502         m_device_interface.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
503
504         // Bind descriptor set
505         m_device_interface.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
506
507         // Dispatch compute command
508         m_device_interface.cmdDispatch(commandBuffer, 1u, 1u, 1u);
509
510         // Insert memory barrier
511         m_device_interface.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, vk::VK_FALSE, DE_LENGTH_OF_ARRAY(postBarriers), postBarriers);
512 }
513
514 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
515 {
516 public:
517                                                         IndirectDispatchCaseBufferGenerate      (tcu::TestContext&                      testCtx,
518                                                                                                                                  const DispatchCaseDesc&        caseDesc,
519                                                                                                                                  const glu::GLSLVersion         glslVersion)
520                                                                 : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {}
521
522         virtual                                 ~IndirectDispatchCaseBufferGenerate     (void) {}
523
524         virtual void                    initPrograms                                            (vk::SourceCollections&         programCollection) const;
525         virtual TestInstance*   createInstance                                          (Context&                                       context) const;
526
527 private:
528         IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
529         IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
530 };
531
532 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
533 {
534         IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
535
536         const char* const       versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
537
538         std::ostringstream computeBuffer;
539
540         // Header
541         computeBuffer
542                 << versionDecl << "\n"
543                 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
544                 << "layout(set = 0, binding = 0, std430) buffer Out\n"
545                 << "{\n"
546                 << "    highp uint data[];\n"
547                 << "};\n"
548                 << "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
549                 << "{\n"
550                 << "    data[offset+0u] = numWorkGroups.x;\n"
551                 << "    data[offset+1u] = numWorkGroups.y;\n"
552                 << "    data[offset+2u] = numWorkGroups.z;\n"
553                 << "}\n"
554                 << "void main (void)\n"
555                 << "{\n";
556
557         // Dispatch commands
558         for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
559         {
560                 const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
561                 DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
562
563                 computeBuffer
564                         << "\twriteCmd(" << offs << "u, uvec3("
565                         << cmdIter->m_numWorkGroups.x() << "u, "
566                         << cmdIter->m_numWorkGroups.y() << "u, "
567                         << cmdIter->m_numWorkGroups.z() << "u));\n";
568         }
569
570         // Ending
571         computeBuffer << "}\n";
572
573         std::string computeString = computeBuffer.str();
574
575         programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
576 }
577
578 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
579 {
580         return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
581 }
582
583 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
584 {
585         DispatchCommandsVec vec;
586         vec.push_back(cmd);
587         return vec;
588 }
589
590 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
591                                                                  const DispatchCommand& cmd1,
592                                                                  const DispatchCommand& cmd2,
593                                                                  const DispatchCommand& cmd3,
594                                                                  const DispatchCommand& cmd4)
595 {
596         DispatchCommandsVec vec;
597         vec.push_back(cmd0);
598         vec.push_back(cmd1);
599         vec.push_back(cmd2);
600         vec.push_back(cmd3);
601         vec.push_back(cmd4);
602         return vec;
603 }
604
605 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
606                                                                  const DispatchCommand& cmd1,
607                                                                  const DispatchCommand& cmd2,
608                                                                  const DispatchCommand& cmd3,
609                                                                  const DispatchCommand& cmd4,
610                                                                  const DispatchCommand& cmd5,
611                                                                  const DispatchCommand& cmd6)
612 {
613         DispatchCommandsVec vec;
614         vec.push_back(cmd0);
615         vec.push_back(cmd1);
616         vec.push_back(cmd2);
617         vec.push_back(cmd3);
618         vec.push_back(cmd4);
619         vec.push_back(cmd5);
620         vec.push_back(cmd6);
621         return vec;
622 }
623
624 } // anonymous ns
625
626 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx)
627 {
628         static const DispatchCaseDesc s_dispatchCases[] =
629         {
630                 DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
631                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)))
632         ),
633                 DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
634                         commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5)))
635                 ),
636                 DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
637                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3)))
638                 ),
639                 DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
640                         commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1)))
641                 ),
642                 DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1),
643                         commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1)))
644                 ),
645                 DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1),
646                         commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3)))
647                 ),
648                 DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
649                         commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0)))
650                 ),
651                 DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
652                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
653                                                 DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
654                                                 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
655                                                 DispatchCommand(40, tcu::UVec3(1, 1, 7)),
656                                                 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
657                 ),
658                 DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
659                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
660                                                 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
661                                                 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
662                                                 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
663                                                 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
664                                                 DispatchCommand(52, tcu::UVec3(1, 1, 4)),
665                                                 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
666                 ),
667         };
668
669         de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests"));
670
671         tcu::TestCaseGroup* const       groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", "");
672         indirectComputeDispatchTests->addChild(groupBufferUpload);
673
674         for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
675         {
676                 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
677         }
678
679         tcu::TestCaseGroup* const       groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", "");
680         indirectComputeDispatchTests->addChild(groupBufferGenerate);
681
682         for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
683         {
684                 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
685         }
686
687         return indirectComputeDispatchTests.release();
688 }
689
690 } // compute
691 } // vkt