Merge branch 'jekstrand_renderpass_transfer_bit_fix' into 'master'
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / compute / vktComputeIndirectComputeDispatchTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 Mobica Ltd.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and/or associated documentation files (the
9  * "Materials"), to deal in the Materials without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Materials, and to
12  * permit persons to whom the Materials are furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice(s) and this permission notice shall be included
16  * in all copies or substantial portions of the Materials.
17  *
18  * The Materials are Confidential Information as defined by the
19  * Khronos Membership Agreement until designated non-confidential by Khronos,
20  * at which point this condition clause shall be removed.
21  *
22  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
26  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
27  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
28  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
29  *
30  *//*!
31  * \file
32  * \brief Indirect Compute Dispatch tests
33  *//*--------------------------------------------------------------------*/
34
35 #include "vktComputeIndirectComputeDispatchTests.hpp"
36 #include "vktComputeTestsUtil.hpp"
37
38 #include <string>
39 #include <map>
40 #include <vector>
41
42 #include "vkDefs.hpp"
43 #include "vkRef.hpp"
44 #include "vkRefUtil.hpp"
45 #include "vktTestCase.hpp"
46 #include "vktTestCaseUtil.hpp"
47 #include "vkPlatform.hpp"
48 #include "vkPrograms.hpp"
49 #include "vkMemUtil.hpp"
50 #include "vkBuilderUtil.hpp"
51 #include "vkQueryUtil.hpp"
52
53 #include "tcuVector.hpp"
54 #include "tcuVectorUtil.hpp"
55 #include "tcuTestLog.hpp"
56 #include "tcuRGBA.hpp"
57 #include "tcuStringTemplate.hpp"
58
59 #include "deUniquePtr.hpp"
60 #include "deSharedPtr.hpp"
61 #include "deStringUtil.hpp"
62 #include "deArrayUtil.hpp"
63
64 #include "gluShaderUtil.hpp"
65
66 namespace vkt
67 {
68 namespace compute
69 {
70 namespace
71 {
72
73 enum
74 {
75         RESULT_BLOCK_BASE_SIZE                  = 4 * (int)sizeof(deUint32), // uvec3 + uint
76         RESULT_BLOCK_NUM_PASSED_OFFSET  = 3 * (int)sizeof(deUint32),
77         INDIRECT_COMMAND_OFFSET                 = 3 * (int)sizeof(deUint32),
78 };
79
80 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface&        instance_interface,
81                                                                                         const vk::VkPhysicalDevice              physicalDevice,
82                                                                                         const vk::VkDeviceSize                  baseSize)
83 {
84         // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
85         vk::VkPhysicalDeviceProperties deviceProperties;
86         instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
87         vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
88
89         if (alignment == 0 || (baseSize % alignment == 0))
90                 return baseSize;
91         else
92                 return (baseSize / alignment + 1)*alignment;
93 }
94
95 struct DispatchCommand
96 {
97                                 DispatchCommand (const deIntptr         offset,
98                                                                  const tcu::UVec3&      numWorkGroups)
99                                         : m_offset                      (offset)
100                                         , m_numWorkGroups       (numWorkGroups) {}
101
102         deIntptr        m_offset;
103         tcu::UVec3      m_numWorkGroups;
104 };
105
106 typedef std::vector<DispatchCommand> DispatchCommandsVec;
107
108 struct DispatchCaseDesc
109 {
110                                                                 DispatchCaseDesc (const char*                                   name,
111                                                                                                   const char*                                   description,
112                                                                                                   const deUintptr                               bufferSize,
113                                                                                                   const tcu::UVec3                              workGroupSize,
114                                                                                                   const DispatchCommandsVec&    dispatchCommands)
115                                                                         : m_name                                (name)
116                                                                         , m_description                 (description)
117                                                                         , m_bufferSize                  (bufferSize)
118                                                                         , m_workGroupSize               (workGroupSize)
119                                                                         , m_dispatchCommands    (dispatchCommands) {}
120
121         const char*                                     m_name;
122         const char*                                     m_description;
123         const deUintptr                         m_bufferSize;
124         const tcu::UVec3                        m_workGroupSize;
125         const DispatchCommandsVec       m_dispatchCommands;
126 };
127
128 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
129 {
130 public:
131                                                                         IndirectDispatchInstanceBufferUpload    (Context&                                       context,
132                                                                                                                                                          const std::string&                     name,
133                                                                                                                                                          const deUintptr                        bufferSize,
134                                                                                                                                                          const tcu::UVec3&                      workGroupSize,
135                                                                                                                                                          const DispatchCommandsVec& dispatchCommands);
136
137         virtual                                                 ~IndirectDispatchInstanceBufferUpload   (void) {}
138
139         virtual tcu::TestStatus                 iterate                                                                 (void);
140
141 protected:
142         virtual void                                    fillIndirectBufferData                                  (const vk::VkCommandBuffer      commandBuffer,
143                                                                                                                                                          const Buffer&                          indirectBuffer);
144
145         deBool                                                  verifyResultBuffer                                              (const Buffer&                          resultBuffer,
146                                                                                                                                                          const vk::VkDeviceSize         resultBlockSize,
147                                                                                                                                                          const vk::VkDeviceSize         resultBufferSize) const;
148
149         Context&                                                m_context;
150         const std::string                               m_name;
151
152         const vk::DeviceInterface&              m_device_interface;
153         const vk::VkDevice                              m_device;
154
155         const vk::VkQueue                               m_queue;
156         const deUint32                                  m_queueFamilyIndex;
157
158         const deUintptr                                 m_bufferSize;
159         const tcu::UVec3                                m_workGroupSize;
160         const DispatchCommandsVec               m_dispatchCommands;
161
162         vk::Allocator&                                  m_allocator;
163
164 private:
165         IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
166         IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
167 };
168
169 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context&                                    context,
170                                                                                                                                                         const std::string&                      name,
171                                                                                                                                                         const deUintptr                         bufferSize,
172                                                                                                                                                         const tcu::UVec3&                       workGroupSize,
173                                                                                                                                                         const DispatchCommandsVec&      dispatchCommands)
174         : vkt::TestInstance             (context)
175         , m_context                             (context)
176         , m_name                                (name)
177         , m_device_interface    (context.getDeviceInterface())
178         , m_device                              (context.getDevice())
179         , m_queue                               (context.getUniversalQueue())
180         , m_queueFamilyIndex    (context.getUniversalQueueFamilyIndex())
181         , m_bufferSize                  (bufferSize)
182         , m_workGroupSize               (workGroupSize)
183         , m_dispatchCommands    (dispatchCommands)
184         , m_allocator                   (context.getDefaultAllocator())
185 {
186 }
187
188 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
189 {
190         DE_UNREF(commandBuffer);
191
192         const vk::Allocation& alloc = indirectBuffer.getAllocation();
193         deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
194
195         for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
196         {
197                 DE_ASSERT(cmdIter->m_offset >= 0);
198                 DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
199                 DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
200
201                 deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
202
203                 dstPtr[0] = cmdIter->m_numWorkGroups[0];
204                 dstPtr[1] = cmdIter->m_numWorkGroups[1];
205                 dstPtr[2] = cmdIter->m_numWorkGroups[2];
206         }
207
208         vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), m_bufferSize);
209 }
210
211 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
212 {
213         tcu::TestContext& testCtx = m_context.getTestContext();
214
215         testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
216         {
217                 tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
218
219                 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
220                 {
221                         testCtx.getLog()
222                                 << tcu::TestLog::Message
223                                 << cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
224                                 << tcu::TestLog::EndMessage;
225                 }
226         }
227
228         // Create result buffer
229         const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
230         const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
231
232         Buffer resultBuffer(
233                 m_device_interface, m_device, m_allocator,
234                 makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
235                 vk::MemoryRequirement::HostVisible);
236
237         {
238                 const vk::Allocation& alloc = resultBuffer.getAllocation();
239                 deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
240
241                 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
242                 {
243                         deUint8* const  dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
244
245                         *(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
246                         *(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
247                         *(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
248                         *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
249                 }
250
251                 vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
252         }
253
254         // Create verify compute shader
255         const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule(
256                 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u));
257
258         // Create descriptorSetLayout
259         vk::DescriptorSetLayoutBuilder layoutBuilder;
260         layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
261         vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
262
263         // Create compute pipeline
264         const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout));
265         const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(m_device_interface, m_device, *pipelineLayout, *verifyShader));
266
267         // Create descriptor pool
268         const vk::Unique<vk::VkDescriptorPool> descriptorPool(
269                 vk::DescriptorPoolBuilder()
270                 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
271                 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
272
273         const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
274                 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
275
276         // Create command buffer
277         const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(m_device_interface, m_device, m_queueFamilyIndex));
278         const vk::Unique<vk::VkCommandBuffer> cmdBuffer(makeCommandBuffer(m_device_interface, m_device, *cmdPool));
279
280         // Begin recording commands
281         beginCommandBuffer(m_device_interface, *cmdBuffer);
282
283         // Create indirect buffer
284         Buffer indirectBuffer(
285                 m_device_interface, m_device, m_allocator,
286                 makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
287                 vk::MemoryRequirement::HostVisible);
288         fillIndirectBufferData(*cmdBuffer, indirectBuffer);
289
290         // Bind compute pipeline
291         m_device_interface.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
292
293         // Allocate descriptor sets
294         DynArray< vk::Move<vk::VkDescriptorSet> > descriptorSets(m_dispatchCommands.size());
295
296         vk::VkDeviceSize curOffset = 0;
297
298         // Create descriptor sets
299         for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
300         {
301                 descriptorSets[cmdNdx] = makeDescriptorSet(m_device_interface, m_device, *descriptorPool, *descriptorSetLayout);
302
303                 const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
304
305                 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
306                 descriptorSetBuilder.writeSingle(*descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
307                 descriptorSetBuilder.update(m_device_interface, m_device);
308
309                 // Bind descriptor set
310                 m_device_interface.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSets[cmdNdx].get(), 0u, DE_NULL);
311
312                 // Dispatch indirect compute command
313                 m_device_interface.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
314
315                 curOffset += resultBlockSize;
316         }
317
318         // Insert memory barrier
319         m_device_interface.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
320                                                                                   0, (const vk::VkMemoryBarrier*)DE_NULL,
321                                                                                   1, &ssboPostBarrier,
322                                                                                   0, (const vk::VkImageMemoryBarrier*)DE_NULL);
323
324         // End recording commands
325         endCommandBuffer(m_device_interface, *cmdBuffer);
326
327         // Wait for command buffer execution finish
328         submitCommandsAndWait(m_device_interface, m_device, m_queue, *cmdBuffer);
329
330         // Check if result buffer contains valid values
331         if (verifyResultBuffer(resultBuffer, resultBlockSize, resultBufferSize))
332                 return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
333         else
334                 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
335 }
336
337 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const Buffer&                  resultBuffer,
338                                                                                                                                  const vk::VkDeviceSize resultBlockSize,
339                                                                                                                                  const vk::VkDeviceSize resultBufferSize) const
340 {
341         deBool allOk = true;
342         const vk::Allocation& alloc = resultBuffer.getAllocation();
343         vk::invalidateMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
344
345         const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
346
347         for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
348         {
349                 const DispatchCommand&  cmd = m_dispatchCommands[cmdNdx];
350                 const deUint8* const    srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
351                 const deUint32                  numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
352                 const deUint32                  numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
353                 const deUint32                  numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
354                 const deUint32                  expectedCount = numInvocationsPerGroup * numGroups;
355
356                 if (numPassed != expectedCount)
357                 {
358                         tcu::TestContext& testCtx = m_context.getTestContext();
359
360                         testCtx.getLog()
361                                 << tcu::TestLog::Message
362                                 << "ERROR: got invalid result for invocation " << cmdNdx
363                                 << ": got numPassed = " << numPassed << ", expected " << expectedCount
364                                 << tcu::TestLog::EndMessage;
365
366                         allOk = false;
367                 }
368         }
369
370         return allOk;
371 }
372
373 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
374 {
375 public:
376                                                                 IndirectDispatchCaseBufferUpload        (tcu::TestContext&                      testCtx,
377                                                                                                                                          const DispatchCaseDesc&        caseDesc,
378                                                                                                                                          const glu::GLSLVersion         glslVersion);
379
380         virtual                                         ~IndirectDispatchCaseBufferUpload       (void) {}
381
382         virtual void                            initPrograms                                            (vk::SourceCollections&         programCollection) const;
383         virtual TestInstance*           createInstance                                          (Context&                                       context) const;
384
385 protected:
386         const deUintptr                         m_bufferSize;
387         const tcu::UVec3                        m_workGroupSize;
388         const DispatchCommandsVec       m_dispatchCommands;
389         const glu::GLSLVersion          m_glslVersion;
390
391 private:
392         IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
393         IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
394 };
395
396 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext&           testCtx,
397                                                                                                                                         const DispatchCaseDesc& caseDesc,
398                                                                                                                                         const glu::GLSLVersion  glslVersion)
399         : vkt::TestCase                 (testCtx, caseDesc.m_name, caseDesc.m_description)
400         , m_bufferSize                  (caseDesc.m_bufferSize)
401         , m_workGroupSize               (caseDesc.m_workGroupSize)
402         , m_dispatchCommands    (caseDesc.m_dispatchCommands)
403         , m_glslVersion                 (glslVersion)
404 {
405 }
406
407 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
408 {
409         const char* const       versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
410
411         std::ostringstream      verifyBuffer;
412
413         verifyBuffer
414                 << versionDecl << "\n"
415                 << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
416                 << "layout(set = 0, binding = 0, std430) buffer Result\n"
417                 << "{\n"
418                 << "    uvec3           expectedGroupCount;\n"
419                 << "    coherent uint   numPassed;\n"
420                 << "} result;\n"
421                 << "void main (void)\n"
422                 << "{\n"
423                 << "    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
424                 << "        atomicAdd(result.numPassed, 1u);\n"
425                 << "}\n";
426
427         std::map<std::string, std::string> args;
428
429         args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
430         args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
431         args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
432
433         std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
434
435         programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
436 }
437
438 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
439 {
440         return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
441 }
442
443 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
444 {
445 public:
446                                                                         IndirectDispatchInstanceBufferGenerate  (Context&                                       context,
447                                                                                                                                                          const std::string&                     name,
448                                                                                                                                                          const deUintptr                        bufferSize,
449                                                                                                                                                          const tcu::UVec3&                      workGroupSize,
450                                                                                                                                                          const DispatchCommandsVec&     dispatchCommands)
451                                                                                 : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands) {}
452
453         virtual                                                 ~IndirectDispatchInstanceBufferGenerate (void) {}
454
455 protected:
456         virtual void                                    fillIndirectBufferData                                  (const vk::VkCommandBuffer      commandBuffer,
457                                                                                                                                                          const Buffer&                          indirectBuffer);
458
459         vk::Move<vk::VkDescriptorPool>  m_descriptorPool;
460         vk::Move<vk::VkDescriptorSet>   m_descriptorSet;
461         vk::Move<vk::VkPipelineLayout>  m_pipelineLayout;
462         vk::Move<vk::VkPipeline>                m_computePipeline;
463
464 private:
465         IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
466         IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
467 };
468
469 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
470 {
471         // Create compute shader that generates data for indirect buffer
472         const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
473                 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
474
475         // Create descriptorSetLayout
476         vk::DescriptorSetLayoutBuilder layoutBuilder;
477         layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
478         vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
479
480         // Create compute pipeline
481         m_pipelineLayout = makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout);
482         m_computePipeline = makeComputePipeline(m_device_interface, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
483
484         // Create descriptor pool
485         m_descriptorPool = vk::DescriptorPoolBuilder()
486                 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
487                 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
488
489         // Create descriptor set
490         m_descriptorSet = makeDescriptorSet(m_device_interface, m_device, *m_descriptorPool, *descriptorSetLayout);
491
492         const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
493
494         vk::DescriptorSetUpdateBuilder  descriptorSetBuilder;
495         descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
496         descriptorSetBuilder.update(m_device_interface, m_device);
497
498         const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
499                 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
500
501         // Bind compute pipeline
502         m_device_interface.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
503
504         // Bind descriptor set
505         m_device_interface.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
506
507         // Dispatch compute command
508         m_device_interface.cmdDispatch(commandBuffer, 1u, 1u, 1u);
509
510         // Insert memory barrier
511         m_device_interface.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
512                                                                                   0, (const vk::VkMemoryBarrier*)DE_NULL,
513                                                                                   1, &bufferBarrier,
514                                                                                   0, (const vk::VkImageMemoryBarrier*)DE_NULL);
515 }
516
517 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
518 {
519 public:
520                                                         IndirectDispatchCaseBufferGenerate      (tcu::TestContext&                      testCtx,
521                                                                                                                                  const DispatchCaseDesc&        caseDesc,
522                                                                                                                                  const glu::GLSLVersion         glslVersion)
523                                                                 : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {}
524
525         virtual                                 ~IndirectDispatchCaseBufferGenerate     (void) {}
526
527         virtual void                    initPrograms                                            (vk::SourceCollections&         programCollection) const;
528         virtual TestInstance*   createInstance                                          (Context&                                       context) const;
529
530 private:
531         IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
532         IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
533 };
534
535 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
536 {
537         IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
538
539         const char* const       versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
540
541         std::ostringstream computeBuffer;
542
543         // Header
544         computeBuffer
545                 << versionDecl << "\n"
546                 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
547                 << "layout(set = 0, binding = 0, std430) buffer Out\n"
548                 << "{\n"
549                 << "    highp uint data[];\n"
550                 << "};\n"
551                 << "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
552                 << "{\n"
553                 << "    data[offset+0u] = numWorkGroups.x;\n"
554                 << "    data[offset+1u] = numWorkGroups.y;\n"
555                 << "    data[offset+2u] = numWorkGroups.z;\n"
556                 << "}\n"
557                 << "void main (void)\n"
558                 << "{\n";
559
560         // Dispatch commands
561         for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
562         {
563                 const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
564                 DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
565
566                 computeBuffer
567                         << "\twriteCmd(" << offs << "u, uvec3("
568                         << cmdIter->m_numWorkGroups.x() << "u, "
569                         << cmdIter->m_numWorkGroups.y() << "u, "
570                         << cmdIter->m_numWorkGroups.z() << "u));\n";
571         }
572
573         // Ending
574         computeBuffer << "}\n";
575
576         std::string computeString = computeBuffer.str();
577
578         programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
579 }
580
581 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
582 {
583         return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
584 }
585
586 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
587 {
588         DispatchCommandsVec vec;
589         vec.push_back(cmd);
590         return vec;
591 }
592
593 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
594                                                                  const DispatchCommand& cmd1,
595                                                                  const DispatchCommand& cmd2,
596                                                                  const DispatchCommand& cmd3,
597                                                                  const DispatchCommand& cmd4)
598 {
599         DispatchCommandsVec vec;
600         vec.push_back(cmd0);
601         vec.push_back(cmd1);
602         vec.push_back(cmd2);
603         vec.push_back(cmd3);
604         vec.push_back(cmd4);
605         return vec;
606 }
607
608 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
609                                                                  const DispatchCommand& cmd1,
610                                                                  const DispatchCommand& cmd2,
611                                                                  const DispatchCommand& cmd3,
612                                                                  const DispatchCommand& cmd4,
613                                                                  const DispatchCommand& cmd5,
614                                                                  const DispatchCommand& cmd6)
615 {
616         DispatchCommandsVec vec;
617         vec.push_back(cmd0);
618         vec.push_back(cmd1);
619         vec.push_back(cmd2);
620         vec.push_back(cmd3);
621         vec.push_back(cmd4);
622         vec.push_back(cmd5);
623         vec.push_back(cmd6);
624         return vec;
625 }
626
627 } // anonymous ns
628
629 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx)
630 {
631         static const DispatchCaseDesc s_dispatchCases[] =
632         {
633                 DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
634                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)))
635         ),
636                 DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
637                         commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5)))
638                 ),
639                 DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
640                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3)))
641                 ),
642                 DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
643                         commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1)))
644                 ),
645                 DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1),
646                         commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1)))
647                 ),
648                 DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1),
649                         commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3)))
650                 ),
651                 DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
652                         commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0)))
653                 ),
654                 DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
655                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
656                                                 DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
657                                                 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
658                                                 DispatchCommand(40, tcu::UVec3(1, 1, 7)),
659                                                 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
660                 ),
661                 DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
662                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
663                                                 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
664                                                 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
665                                                 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
666                                                 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
667                                                 DispatchCommand(52, tcu::UVec3(1, 1, 4)),
668                                                 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
669                 ),
670         };
671
672         de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests"));
673
674         tcu::TestCaseGroup* const       groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", "");
675         indirectComputeDispatchTests->addChild(groupBufferUpload);
676
677         for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
678         {
679                 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
680         }
681
682         tcu::TestCaseGroup* const       groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", "");
683         indirectComputeDispatchTests->addChild(groupBufferGenerate);
684
685         for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
686         {
687                 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
688         }
689
690         return indirectComputeDispatchTests.release();
691 }
692
693 } // compute
694 } // vkt