Fix texture/sampler mapping in sRGB tests am: c8efb45ead am: 984445983d am: bd25f07ba0
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / compute / vktComputeIndirectComputeDispatchTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 The Android Open Source Project
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Indirect Compute Dispatch tests
23  *//*--------------------------------------------------------------------*/
24
25 #include "vktComputeIndirectComputeDispatchTests.hpp"
26 #include "vktComputeTestsUtil.hpp"
27
28 #include <string>
29 #include <map>
30 #include <vector>
31
32 #include "vkDefs.hpp"
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vktTestCase.hpp"
36 #include "vktTestCaseUtil.hpp"
37 #include "vkPlatform.hpp"
38 #include "vkPrograms.hpp"
39 #include "vkMemUtil.hpp"
40 #include "vkBuilderUtil.hpp"
41 #include "vkQueryUtil.hpp"
42
43 #include "tcuVector.hpp"
44 #include "tcuVectorUtil.hpp"
45 #include "tcuTestLog.hpp"
46 #include "tcuRGBA.hpp"
47 #include "tcuStringTemplate.hpp"
48
49 #include "deUniquePtr.hpp"
50 #include "deSharedPtr.hpp"
51 #include "deStringUtil.hpp"
52 #include "deArrayUtil.hpp"
53
54 #include "gluShaderUtil.hpp"
55
56 namespace vkt
57 {
58 namespace compute
59 {
60 namespace
61 {
62
63 enum
64 {
65         RESULT_BLOCK_BASE_SIZE                  = 4 * (int)sizeof(deUint32), // uvec3 + uint
66         RESULT_BLOCK_NUM_PASSED_OFFSET  = 3 * (int)sizeof(deUint32),
67         INDIRECT_COMMAND_OFFSET                 = 3 * (int)sizeof(deUint32),
68 };
69
70 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface&        instance_interface,
71                                                                                         const vk::VkPhysicalDevice              physicalDevice,
72                                                                                         const vk::VkDeviceSize                  baseSize)
73 {
74         // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
75         vk::VkPhysicalDeviceProperties deviceProperties;
76         instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
77         vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
78
79         if (alignment == 0 || (baseSize % alignment == 0))
80                 return baseSize;
81         else
82                 return (baseSize / alignment + 1)*alignment;
83 }
84
85 struct DispatchCommand
86 {
87                                 DispatchCommand (const deIntptr         offset,
88                                                                  const tcu::UVec3&      numWorkGroups)
89                                         : m_offset                      (offset)
90                                         , m_numWorkGroups       (numWorkGroups) {}
91
92         deIntptr        m_offset;
93         tcu::UVec3      m_numWorkGroups;
94 };
95
96 typedef std::vector<DispatchCommand> DispatchCommandsVec;
97
98 struct DispatchCaseDesc
99 {
100                                                                 DispatchCaseDesc (const char*                                   name,
101                                                                                                   const char*                                   description,
102                                                                                                   const deUintptr                               bufferSize,
103                                                                                                   const tcu::UVec3                              workGroupSize,
104                                                                                                   const DispatchCommandsVec&    dispatchCommands)
105                                                                         : m_name                                (name)
106                                                                         , m_description                 (description)
107                                                                         , m_bufferSize                  (bufferSize)
108                                                                         , m_workGroupSize               (workGroupSize)
109                                                                         , m_dispatchCommands    (dispatchCommands) {}
110
111         const char*                                     m_name;
112         const char*                                     m_description;
113         const deUintptr                         m_bufferSize;
114         const tcu::UVec3                        m_workGroupSize;
115         const DispatchCommandsVec       m_dispatchCommands;
116 };
117
118 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
119 {
120 public:
121                                                                         IndirectDispatchInstanceBufferUpload    (Context&                                       context,
122                                                                                                                                                          const std::string&                     name,
123                                                                                                                                                          const deUintptr                        bufferSize,
124                                                                                                                                                          const tcu::UVec3&                      workGroupSize,
125                                                                                                                                                          const DispatchCommandsVec& dispatchCommands);
126
127         virtual                                                 ~IndirectDispatchInstanceBufferUpload   (void) {}
128
129         virtual tcu::TestStatus                 iterate                                                                 (void);
130
131 protected:
132         virtual void                                    fillIndirectBufferData                                  (const vk::VkCommandBuffer      commandBuffer,
133                                                                                                                                                          const Buffer&                          indirectBuffer);
134
135         deBool                                                  verifyResultBuffer                                              (const Buffer&                          resultBuffer,
136                                                                                                                                                          const vk::VkDeviceSize         resultBlockSize,
137                                                                                                                                                          const vk::VkDeviceSize         resultBufferSize) const;
138
139         Context&                                                m_context;
140         const std::string                               m_name;
141
142         const vk::DeviceInterface&              m_device_interface;
143         const vk::VkDevice                              m_device;
144
145         const vk::VkQueue                               m_queue;
146         const deUint32                                  m_queueFamilyIndex;
147
148         const deUintptr                                 m_bufferSize;
149         const tcu::UVec3                                m_workGroupSize;
150         const DispatchCommandsVec               m_dispatchCommands;
151
152         vk::Allocator&                                  m_allocator;
153
154 private:
155         IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
156         IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
157 };
158
159 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context&                                    context,
160                                                                                                                                                         const std::string&                      name,
161                                                                                                                                                         const deUintptr                         bufferSize,
162                                                                                                                                                         const tcu::UVec3&                       workGroupSize,
163                                                                                                                                                         const DispatchCommandsVec&      dispatchCommands)
164         : vkt::TestInstance             (context)
165         , m_context                             (context)
166         , m_name                                (name)
167         , m_device_interface    (context.getDeviceInterface())
168         , m_device                              (context.getDevice())
169         , m_queue                               (context.getUniversalQueue())
170         , m_queueFamilyIndex    (context.getUniversalQueueFamilyIndex())
171         , m_bufferSize                  (bufferSize)
172         , m_workGroupSize               (workGroupSize)
173         , m_dispatchCommands    (dispatchCommands)
174         , m_allocator                   (context.getDefaultAllocator())
175 {
176 }
177
178 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
179 {
180         DE_UNREF(commandBuffer);
181
182         const vk::Allocation& alloc = indirectBuffer.getAllocation();
183         deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
184
185         for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
186         {
187                 DE_ASSERT(cmdIter->m_offset >= 0);
188                 DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
189                 DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
190
191                 deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
192
193                 dstPtr[0] = cmdIter->m_numWorkGroups[0];
194                 dstPtr[1] = cmdIter->m_numWorkGroups[1];
195                 dstPtr[2] = cmdIter->m_numWorkGroups[2];
196         }
197
198         vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), m_bufferSize);
199 }
200
201 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
202 {
203         tcu::TestContext& testCtx = m_context.getTestContext();
204
205         testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
206         {
207                 tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
208
209                 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
210                 {
211                         testCtx.getLog()
212                                 << tcu::TestLog::Message
213                                 << cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
214                                 << tcu::TestLog::EndMessage;
215                 }
216         }
217
218         // Create result buffer
219         const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
220         const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
221
222         Buffer resultBuffer(
223                 m_device_interface, m_device, m_allocator,
224                 makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
225                 vk::MemoryRequirement::HostVisible);
226
227         {
228                 const vk::Allocation& alloc = resultBuffer.getAllocation();
229                 deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
230
231                 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
232                 {
233                         deUint8* const  dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
234
235                         *(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
236                         *(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
237                         *(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
238                         *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
239                 }
240
241                 vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
242         }
243
244         // Create verify compute shader
245         const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule(
246                 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u));
247
248         // Create descriptorSetLayout
249         vk::DescriptorSetLayoutBuilder layoutBuilder;
250         layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
251         vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
252
253         // Create compute pipeline
254         const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout));
255         const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(m_device_interface, m_device, *pipelineLayout, *verifyShader));
256
257         // Create descriptor pool
258         const vk::Unique<vk::VkDescriptorPool> descriptorPool(
259                 vk::DescriptorPoolBuilder()
260                 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
261                 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
262
263         const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
264                 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
265
266         // Create command buffer
267         const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(m_device_interface, m_device, m_queueFamilyIndex));
268         const vk::Unique<vk::VkCommandBuffer> cmdBuffer(allocateCommandBuffer(m_device_interface, m_device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
269
270         // Begin recording commands
271         beginCommandBuffer(m_device_interface, *cmdBuffer);
272
273         // Create indirect buffer
274         Buffer indirectBuffer(
275                 m_device_interface, m_device, m_allocator,
276                 makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
277                 vk::MemoryRequirement::HostVisible);
278         fillIndirectBufferData(*cmdBuffer, indirectBuffer);
279
280         // Bind compute pipeline
281         m_device_interface.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
282
283         // Allocate descriptor sets
284         typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet;
285         std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
286
287         vk::VkDeviceSize curOffset = 0;
288
289         // Create descriptor sets
290         for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
291         {
292                 descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
293                                                                         makeDescriptorSet(m_device_interface, m_device, *descriptorPool, *descriptorSetLayout)));
294
295                 const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
296
297                 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
298                 descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
299                 descriptorSetBuilder.update(m_device_interface, m_device);
300
301                 // Bind descriptor set
302                 m_device_interface.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
303
304                 // Dispatch indirect compute command
305                 m_device_interface.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
306
307                 curOffset += resultBlockSize;
308         }
309
310         // Insert memory barrier
311         m_device_interface.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
312                                                                                   0, (const vk::VkMemoryBarrier*)DE_NULL,
313                                                                                   1, &ssboPostBarrier,
314                                                                                   0, (const vk::VkImageMemoryBarrier*)DE_NULL);
315
316         // End recording commands
317         endCommandBuffer(m_device_interface, *cmdBuffer);
318
319         // Wait for command buffer execution finish
320         submitCommandsAndWait(m_device_interface, m_device, m_queue, *cmdBuffer);
321
322         // Check if result buffer contains valid values
323         if (verifyResultBuffer(resultBuffer, resultBlockSize, resultBufferSize))
324                 return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
325         else
326                 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
327 }
328
329 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const Buffer&                  resultBuffer,
330                                                                                                                                  const vk::VkDeviceSize resultBlockSize,
331                                                                                                                                  const vk::VkDeviceSize resultBufferSize) const
332 {
333         deBool allOk = true;
334         const vk::Allocation& alloc = resultBuffer.getAllocation();
335         vk::invalidateMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
336
337         const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
338
339         for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
340         {
341                 const DispatchCommand&  cmd = m_dispatchCommands[cmdNdx];
342                 const deUint8* const    srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
343                 const deUint32                  numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
344                 const deUint32                  numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
345                 const deUint32                  numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
346                 const deUint32                  expectedCount = numInvocationsPerGroup * numGroups;
347
348                 if (numPassed != expectedCount)
349                 {
350                         tcu::TestContext& testCtx = m_context.getTestContext();
351
352                         testCtx.getLog()
353                                 << tcu::TestLog::Message
354                                 << "ERROR: got invalid result for invocation " << cmdNdx
355                                 << ": got numPassed = " << numPassed << ", expected " << expectedCount
356                                 << tcu::TestLog::EndMessage;
357
358                         allOk = false;
359                 }
360         }
361
362         return allOk;
363 }
364
365 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
366 {
367 public:
368                                                                 IndirectDispatchCaseBufferUpload        (tcu::TestContext&                      testCtx,
369                                                                                                                                          const DispatchCaseDesc&        caseDesc,
370                                                                                                                                          const glu::GLSLVersion         glslVersion);
371
372         virtual                                         ~IndirectDispatchCaseBufferUpload       (void) {}
373
374         virtual void                            initPrograms                                            (vk::SourceCollections&         programCollection) const;
375         virtual TestInstance*           createInstance                                          (Context&                                       context) const;
376
377 protected:
378         const deUintptr                         m_bufferSize;
379         const tcu::UVec3                        m_workGroupSize;
380         const DispatchCommandsVec       m_dispatchCommands;
381         const glu::GLSLVersion          m_glslVersion;
382
383 private:
384         IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
385         IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
386 };
387
388 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext&           testCtx,
389                                                                                                                                         const DispatchCaseDesc& caseDesc,
390                                                                                                                                         const glu::GLSLVersion  glslVersion)
391         : vkt::TestCase                 (testCtx, caseDesc.m_name, caseDesc.m_description)
392         , m_bufferSize                  (caseDesc.m_bufferSize)
393         , m_workGroupSize               (caseDesc.m_workGroupSize)
394         , m_dispatchCommands    (caseDesc.m_dispatchCommands)
395         , m_glslVersion                 (glslVersion)
396 {
397 }
398
399 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
400 {
401         const char* const       versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
402
403         std::ostringstream      verifyBuffer;
404
405         verifyBuffer
406                 << versionDecl << "\n"
407                 << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
408                 << "layout(set = 0, binding = 0, std430) buffer Result\n"
409                 << "{\n"
410                 << "    uvec3           expectedGroupCount;\n"
411                 << "    coherent uint   numPassed;\n"
412                 << "} result;\n"
413                 << "void main (void)\n"
414                 << "{\n"
415                 << "    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
416                 << "        atomicAdd(result.numPassed, 1u);\n"
417                 << "}\n";
418
419         std::map<std::string, std::string> args;
420
421         args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
422         args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
423         args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
424
425         std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
426
427         programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
428 }
429
430 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
431 {
432         return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
433 }
434
435 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
436 {
437 public:
438                                                                         IndirectDispatchInstanceBufferGenerate  (Context&                                       context,
439                                                                                                                                                          const std::string&                     name,
440                                                                                                                                                          const deUintptr                        bufferSize,
441                                                                                                                                                          const tcu::UVec3&                      workGroupSize,
442                                                                                                                                                          const DispatchCommandsVec&     dispatchCommands)
443                                                                                 : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands) {}
444
445         virtual                                                 ~IndirectDispatchInstanceBufferGenerate (void) {}
446
447 protected:
448         virtual void                                    fillIndirectBufferData                                  (const vk::VkCommandBuffer      commandBuffer,
449                                                                                                                                                          const Buffer&                          indirectBuffer);
450
451         vk::Move<vk::VkDescriptorPool>  m_descriptorPool;
452         vk::Move<vk::VkDescriptorSet>   m_descriptorSet;
453         vk::Move<vk::VkPipelineLayout>  m_pipelineLayout;
454         vk::Move<vk::VkPipeline>                m_computePipeline;
455
456 private:
457         IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
458         IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
459 };
460
461 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
462 {
463         // Create compute shader that generates data for indirect buffer
464         const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
465                 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
466
467         // Create descriptorSetLayout
468         vk::DescriptorSetLayoutBuilder layoutBuilder;
469         layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
470         vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
471
472         // Create compute pipeline
473         m_pipelineLayout = makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout);
474         m_computePipeline = makeComputePipeline(m_device_interface, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
475
476         // Create descriptor pool
477         m_descriptorPool = vk::DescriptorPoolBuilder()
478                 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
479                 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
480
481         // Create descriptor set
482         m_descriptorSet = makeDescriptorSet(m_device_interface, m_device, *m_descriptorPool, *descriptorSetLayout);
483
484         const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
485
486         vk::DescriptorSetUpdateBuilder  descriptorSetBuilder;
487         descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
488         descriptorSetBuilder.update(m_device_interface, m_device);
489
490         const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
491                 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
492
493         // Bind compute pipeline
494         m_device_interface.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
495
496         // Bind descriptor set
497         m_device_interface.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
498
499         // Dispatch compute command
500         m_device_interface.cmdDispatch(commandBuffer, 1u, 1u, 1u);
501
502         // Insert memory barrier
503         m_device_interface.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
504                                                                                   0, (const vk::VkMemoryBarrier*)DE_NULL,
505                                                                                   1, &bufferBarrier,
506                                                                                   0, (const vk::VkImageMemoryBarrier*)DE_NULL);
507 }
508
509 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
510 {
511 public:
512                                                         IndirectDispatchCaseBufferGenerate      (tcu::TestContext&                      testCtx,
513                                                                                                                                  const DispatchCaseDesc&        caseDesc,
514                                                                                                                                  const glu::GLSLVersion         glslVersion)
515                                                                 : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {}
516
517         virtual                                 ~IndirectDispatchCaseBufferGenerate     (void) {}
518
519         virtual void                    initPrograms                                            (vk::SourceCollections&         programCollection) const;
520         virtual TestInstance*   createInstance                                          (Context&                                       context) const;
521
522 private:
523         IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
524         IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
525 };
526
527 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
528 {
529         IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
530
531         const char* const       versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
532
533         std::ostringstream computeBuffer;
534
535         // Header
536         computeBuffer
537                 << versionDecl << "\n"
538                 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
539                 << "layout(set = 0, binding = 0, std430) buffer Out\n"
540                 << "{\n"
541                 << "    highp uint data[];\n"
542                 << "};\n"
543                 << "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
544                 << "{\n"
545                 << "    data[offset+0u] = numWorkGroups.x;\n"
546                 << "    data[offset+1u] = numWorkGroups.y;\n"
547                 << "    data[offset+2u] = numWorkGroups.z;\n"
548                 << "}\n"
549                 << "void main (void)\n"
550                 << "{\n";
551
552         // Dispatch commands
553         for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
554         {
555                 const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
556                 DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
557
558                 computeBuffer
559                         << "\twriteCmd(" << offs << "u, uvec3("
560                         << cmdIter->m_numWorkGroups.x() << "u, "
561                         << cmdIter->m_numWorkGroups.y() << "u, "
562                         << cmdIter->m_numWorkGroups.z() << "u));\n";
563         }
564
565         // Ending
566         computeBuffer << "}\n";
567
568         std::string computeString = computeBuffer.str();
569
570         programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
571 }
572
573 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
574 {
575         return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
576 }
577
578 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
579 {
580         DispatchCommandsVec vec;
581         vec.push_back(cmd);
582         return vec;
583 }
584
585 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
586                                                                  const DispatchCommand& cmd1,
587                                                                  const DispatchCommand& cmd2,
588                                                                  const DispatchCommand& cmd3,
589                                                                  const DispatchCommand& cmd4)
590 {
591         DispatchCommandsVec vec;
592         vec.push_back(cmd0);
593         vec.push_back(cmd1);
594         vec.push_back(cmd2);
595         vec.push_back(cmd3);
596         vec.push_back(cmd4);
597         return vec;
598 }
599
600 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
601                                                                  const DispatchCommand& cmd1,
602                                                                  const DispatchCommand& cmd2,
603                                                                  const DispatchCommand& cmd3,
604                                                                  const DispatchCommand& cmd4,
605                                                                  const DispatchCommand& cmd5,
606                                                                  const DispatchCommand& cmd6)
607 {
608         DispatchCommandsVec vec;
609         vec.push_back(cmd0);
610         vec.push_back(cmd1);
611         vec.push_back(cmd2);
612         vec.push_back(cmd3);
613         vec.push_back(cmd4);
614         vec.push_back(cmd5);
615         vec.push_back(cmd6);
616         return vec;
617 }
618
619 } // anonymous ns
620
621 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx)
622 {
623         static const DispatchCaseDesc s_dispatchCases[] =
624         {
625                 DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
626                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)))
627         ),
628                 DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
629                         commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5)))
630                 ),
631                 DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
632                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3)))
633                 ),
634                 DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
635                         commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1)))
636                 ),
637                 DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1),
638                         commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1)))
639                 ),
640                 DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1),
641                         commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3)))
642                 ),
643                 DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
644                         commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0)))
645                 ),
646                 DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
647                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
648                                                 DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
649                                                 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
650                                                 DispatchCommand(40, tcu::UVec3(1, 1, 7)),
651                                                 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
652                 ),
653                 DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
654                         commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
655                                                 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
656                                                 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
657                                                 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
658                                                 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
659                                                 DispatchCommand(52, tcu::UVec3(1, 1, 4)),
660                                                 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
661                 ),
662         };
663
664         de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests"));
665
666         tcu::TestCaseGroup* const       groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", "");
667         indirectComputeDispatchTests->addChild(groupBufferUpload);
668
669         for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
670         {
671                 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
672         }
673
674         tcu::TestCaseGroup* const       groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", "");
675         indirectComputeDispatchTests->addChild(groupBufferGenerate);
676
677         for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
678         {
679                 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
680         }
681
682         return indirectComputeDispatchTests.release();
683 }
684
685 } // compute
686 } // vkt