Manual merge of AOSP change 197338
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / compute / vktComputeBasicComputeShaderTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 Google Inc.
6  * Copyright (c) 2015 Mobica Ltd.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and/or associated documentation files (the
10  * "Materials"), to deal in the Materials without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sublicense, and/or sell copies of the Materials, and to
13  * permit persons to whom the Materials are furnished to do so, subject to
14  * the following conditions:
15  *
16  * The above copyright notice(s) and this permission notice shall be included
17  * in all copies or substantial portions of the Materials.
18  *
19  * The Materials are Confidential Information as defined by the
20  * Khronos Membership Agreement until designated non-confidential by Khronos,
21  * at which point this condition clause shall be removed.
22  *
23  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
26  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
27  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
28  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
29  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
30  *
31  *//*!
32  * \file
33  * \brief Compute Shader Tests
34  *//*--------------------------------------------------------------------*/
35
36 #include "vktComputeBasicComputeShaderTests.hpp"
37 #include "vktTestCase.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "vktComputeTestsUtil.hpp"
40
41 #include "vkDefs.hpp"
42 #include "vkRef.hpp"
43 #include "vkRefUtil.hpp"
44 #include "vkPlatform.hpp"
45 #include "vkPrograms.hpp"
46 #include "vkRefUtil.hpp"
47 #include "vkMemUtil.hpp"
48 #include "vkQueryUtil.hpp"
49 #include "vkBuilderUtil.hpp"
50 #include "vkTypeUtil.hpp"
51
52 #include "deStringUtil.hpp"
53 #include "deUniquePtr.hpp"
54 #include "deRandom.hpp"
55
56 #include <vector>
57
58 using namespace vk;
59
60 namespace vkt
61 {
62 namespace compute
63 {
64 namespace
65 {
66
67 template<typename T, int size>
68 T multiplyComponents (const tcu::Vector<T, size>& v)
69 {
70         T accum = 1;
71         for (int i = 0; i < size; ++i)
72                 accum *= v[i];
73         return accum;
74 }
75
76 template<typename T>
77 inline T squared (const T& a)
78 {
79         return a * a;
80 }
81
82 inline VkImageCreateInfo make2DImageCreateInfo (const tcu::IVec2& imageSize, const VkImageUsageFlags usage)
83 {
84         const VkImageCreateInfo imageParams =
85         {
86                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                            // VkStructureType                      sType;
87                 DE_NULL,                                                                                        // const void*                          pNext;
88                 0u,                                                                                                     // VkImageCreateFlags           flags;
89                 VK_IMAGE_TYPE_2D,                                                                       // VkImageType                          imageType;
90                 VK_FORMAT_R32_UINT,                                                                     // VkFormat                                     format;
91                 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1),      // VkExtent3D                           extent;
92                 1u,                                                                                                     // deUint32                                     mipLevels;
93                 1u,                                                                                                     // deUint32                                     arrayLayers;
94                 VK_SAMPLE_COUNT_1_BIT,                                                          // VkSampleCountFlagBits        samples;
95                 VK_IMAGE_TILING_OPTIMAL,                                                        // VkImageTiling                        tiling;
96                 usage,                                                                                          // VkImageUsageFlags            usage;
97                 VK_SHARING_MODE_EXCLUSIVE,                                                      // VkSharingMode                        sharingMode;
98                 0u,                                                                                                     // deUint32                                     queueFamilyIndexCount;
99                 DE_NULL,                                                                                        // const deUint32*                      pQueueFamilyIndices;
100                 VK_IMAGE_LAYOUT_UNDEFINED,                                                      // VkImageLayout                        initialLayout;
101         };
102         return imageParams;
103 }
104
105 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2& imageSize)
106 {
107         return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
108 }
109
110 enum BufferType
111 {
112         BUFFER_TYPE_UNIFORM,
113         BUFFER_TYPE_SSBO,
114 };
115
116 class SharedVarTest : public vkt::TestCase
117 {
118 public:
119                                                 SharedVarTest   (tcu::TestContext&              testCtx,
120                                                                                  const std::string&             name,
121                                                                                  const std::string&             description,
122                                                                                  const tcu::IVec3&              localSize,
123                                                                                  const tcu::IVec3&              workSize);
124
125         void                            initPrograms    (SourceCollections&             sourceCollections) const;
126         TestInstance*           createInstance  (Context&                               context) const;
127
128 private:
129         const tcu::IVec3        m_localSize;
130         const tcu::IVec3        m_workSize;
131 };
132
133 class SharedVarTestInstance : public vkt::TestInstance
134 {
135 public:
136                                                                         SharedVarTestInstance   (Context&                       context,
137                                                                                                                          const tcu::IVec3&      localSize,
138                                                                                                                          const tcu::IVec3&      workSize);
139
140         tcu::TestStatus                                 iterate                                 (void);
141
142 private:
143         const tcu::IVec3                                m_localSize;
144         const tcu::IVec3                                m_workSize;
145 };
146
147 SharedVarTest::SharedVarTest (tcu::TestContext&         testCtx,
148                                                           const std::string&    name,
149                                                           const std::string&    description,
150                                                           const tcu::IVec3&             localSize,
151                                                           const tcu::IVec3&             workSize)
152         : TestCase              (testCtx, name, description)
153         , m_localSize   (localSize)
154         , m_workSize    (workSize)
155 {
156 }
157
158 void SharedVarTest::initPrograms (SourceCollections& sourceCollections) const
159 {
160         const int workGroupSize = multiplyComponents(m_localSize);
161         const int workGroupCount = multiplyComponents(m_workSize);
162         const int numValues = workGroupSize * workGroupCount;
163
164         std::ostringstream src;
165         src << "#version 310 es\n"
166                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
167                 << "layout(binding = 0) writeonly buffer Output {\n"
168                 << "    uint values[" << numValues << "];\n"
169                 << "} sb_out;\n\n"
170                 << "shared uint offsets[" << workGroupSize << "];\n\n"
171                 << "void main (void) {\n"
172                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
173                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
174                 << "    uint globalOffs = localSize*globalNdx;\n"
175                 << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
176                 << "\n"
177                 << "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
178                 << "    memoryBarrierShared();\n"
179                 << "    barrier();\n"
180                 << "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
181                 << "}\n";
182
183         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
184 }
185
186 TestInstance* SharedVarTest::createInstance (Context& context) const
187 {
188         return new SharedVarTestInstance(context, m_localSize, m_workSize);
189 }
190
191 SharedVarTestInstance::SharedVarTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
192         : TestInstance  (context)
193         , m_localSize   (localSize)
194         , m_workSize    (workSize)
195 {
196 }
197
198 tcu::TestStatus SharedVarTestInstance::iterate (void)
199 {
200         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
201         const VkDevice                  device                          = m_context.getDevice();
202         const VkQueue                   queue                           = m_context.getUniversalQueue();
203         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
204         Allocator&                              allocator                       = m_context.getDefaultAllocator();
205
206         const int workGroupSize = multiplyComponents(m_localSize);
207         const int workGroupCount = multiplyComponents(m_workSize);
208
209         // Create a buffer and host-visible memory for it
210
211         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
212         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
213
214         // Create descriptor set
215
216         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
217                 DescriptorSetLayoutBuilder()
218                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
219                 .build(vk, device));
220
221         const Unique<VkDescriptorPool> descriptorPool(
222                 DescriptorPoolBuilder()
223                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
224                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
225
226         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
227
228         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
229         DescriptorSetUpdateBuilder()
230                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
231                 .update(vk, device);
232
233         // Perform the computation
234
235         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
236         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
237         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
238
239         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
240
241         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
242         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
243
244         // Start recording commands
245
246         beginCommandBuffer(vk, *cmdBuffer);
247
248         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
249         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
250
251         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
252
253         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
254
255         endCommandBuffer(vk, *cmdBuffer);
256
257         // Wait for completion
258
259         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
260
261         // Validate the results
262
263         const Allocation& bufferAllocation = buffer.getAllocation();
264         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
265
266         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
267
268         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
269         {
270                 const int globalOffset = groupNdx * workGroupSize;
271                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
272                 {
273                         const deUint32 res = bufferPtr[globalOffset + localOffset];
274                         const deUint32 ref = globalOffset + squared(workGroupSize - localOffset - 1);
275
276                         if (res != ref)
277                         {
278                                 std::ostringstream msg;
279                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
280                                 return tcu::TestStatus::fail(msg.str());
281                         }
282                 }
283         }
284         return tcu::TestStatus::pass("Compute succeeded");
285 }
286
287 class SharedVarAtomicOpTest : public vkt::TestCase
288 {
289 public:
290                                                 SharedVarAtomicOpTest   (tcu::TestContext&      testCtx,
291                                                                                                  const std::string&     name,
292                                                                                                  const std::string&     description,
293                                                                                                  const tcu::IVec3&      localSize,
294                                                                                                  const tcu::IVec3&      workSize);
295
296         void                            initPrograms                    (SourceCollections& sourceCollections) const;
297         TestInstance*           createInstance                  (Context&                       context) const;
298
299 private:
300         const tcu::IVec3        m_localSize;
301         const tcu::IVec3        m_workSize;
302 };
303
304 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
305 {
306 public:
307                                                                         SharedVarAtomicOpTestInstance   (Context&                       context,
308                                                                                                                                          const tcu::IVec3&      localSize,
309                                                                                                                                          const tcu::IVec3&      workSize);
310
311         tcu::TestStatus                                 iterate                                                 (void);
312
313 private:
314         const tcu::IVec3                                m_localSize;
315         const tcu::IVec3                                m_workSize;
316 };
317
318 SharedVarAtomicOpTest::SharedVarAtomicOpTest (tcu::TestContext&         testCtx,
319                                                                                           const std::string&    name,
320                                                                                           const std::string&    description,
321                                                                                           const tcu::IVec3&             localSize,
322                                                                                           const tcu::IVec3&             workSize)
323         : TestCase              (testCtx, name, description)
324         , m_localSize   (localSize)
325         , m_workSize    (workSize)
326 {
327 }
328
329 void SharedVarAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
330 {
331         const int workGroupSize = multiplyComponents(m_localSize);
332         const int workGroupCount = multiplyComponents(m_workSize);
333         const int numValues = workGroupSize * workGroupCount;
334
335         std::ostringstream src;
336         src << "#version 310 es\n"
337                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
338                 << "layout(binding = 0) writeonly buffer Output {\n"
339                 << "    uint values[" << numValues << "];\n"
340                 << "} sb_out;\n\n"
341                 << "shared uint count;\n\n"
342                 << "void main (void) {\n"
343                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
344                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
345                 << "    uint globalOffs = localSize*globalNdx;\n"
346                 << "\n"
347                 << "    count = 0u;\n"
348                 << "    memoryBarrierShared();\n"
349                 << "    barrier();\n"
350                 << "    uint oldVal = atomicAdd(count, 1u);\n"
351                 << "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
352                 << "}\n";
353
354         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
355 }
356
357 TestInstance* SharedVarAtomicOpTest::createInstance (Context& context) const
358 {
359         return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize);
360 }
361
362 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
363         : TestInstance  (context)
364         , m_localSize   (localSize)
365         , m_workSize    (workSize)
366 {
367 }
368
369 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate (void)
370 {
371         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
372         const VkDevice                  device                          = m_context.getDevice();
373         const VkQueue                   queue                           = m_context.getUniversalQueue();
374         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
375         Allocator&                              allocator                       = m_context.getDefaultAllocator();
376
377         const int workGroupSize = multiplyComponents(m_localSize);
378         const int workGroupCount = multiplyComponents(m_workSize);
379
380         // Create a buffer and host-visible memory for it
381
382         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
383         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
384
385         // Create descriptor set
386
387         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
388                 DescriptorSetLayoutBuilder()
389                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
390                 .build(vk, device));
391
392         const Unique<VkDescriptorPool> descriptorPool(
393                 DescriptorPoolBuilder()
394                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
395                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
396
397         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
398
399         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
400         DescriptorSetUpdateBuilder()
401                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
402                 .update(vk, device);
403
404         // Perform the computation
405
406         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
407         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
408         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
409
410         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
411
412         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
413         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
414
415         // Start recording commands
416
417         beginCommandBuffer(vk, *cmdBuffer);
418
419         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
420         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
421
422         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
423
424         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1u, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
425
426         endCommandBuffer(vk, *cmdBuffer);
427
428         // Wait for completion
429
430         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
431
432         // Validate the results
433
434         const Allocation& bufferAllocation = buffer.getAllocation();
435         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
436
437         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
438
439         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
440         {
441                 const int globalOffset = groupNdx * workGroupSize;
442                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
443                 {
444                         const deUint32 res = bufferPtr[globalOffset + localOffset];
445                         const deUint32 ref = localOffset + 1;
446
447                         if (res != ref)
448                         {
449                                 std::ostringstream msg;
450                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
451                                 return tcu::TestStatus::fail(msg.str());
452                         }
453                 }
454         }
455         return tcu::TestStatus::pass("Compute succeeded");
456 }
457
458 class SSBOLocalBarrierTest : public vkt::TestCase
459 {
460 public:
461                                                 SSBOLocalBarrierTest    (tcu::TestContext&      testCtx,
462                                                                                                  const std::string& name,
463                                                                                                  const std::string&     description,
464                                                                                                  const tcu::IVec3&      localSize,
465                                                                                                  const tcu::IVec3&      workSize);
466
467         void                            initPrograms                    (SourceCollections& sourceCollections) const;
468         TestInstance*           createInstance                  (Context&                       context) const;
469
470 private:
471         const tcu::IVec3        m_localSize;
472         const tcu::IVec3        m_workSize;
473 };
474
475 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
476 {
477 public:
478                                                                         SSBOLocalBarrierTestInstance    (Context&                       context,
479                                                                                                                                          const tcu::IVec3&      localSize,
480                                                                                                                                          const tcu::IVec3&      workSize);
481
482         tcu::TestStatus                                 iterate                                                 (void);
483
484 private:
485         const tcu::IVec3                                m_localSize;
486         const tcu::IVec3                                m_workSize;
487 };
488
489 SSBOLocalBarrierTest::SSBOLocalBarrierTest (tcu::TestContext&   testCtx,
490                                                                                         const std::string&      name,
491                                                                                         const std::string&      description,
492                                                                                         const tcu::IVec3&       localSize,
493                                                                                         const tcu::IVec3&       workSize)
494         : TestCase              (testCtx, name, description)
495         , m_localSize   (localSize)
496         , m_workSize    (workSize)
497 {
498 }
499
500 void SSBOLocalBarrierTest::initPrograms (SourceCollections& sourceCollections) const
501 {
502         const int workGroupSize = multiplyComponents(m_localSize);
503         const int workGroupCount = multiplyComponents(m_workSize);
504         const int numValues = workGroupSize * workGroupCount;
505
506         std::ostringstream src;
507         src << "#version 310 es\n"
508                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
509                 << "layout(binding = 0) coherent buffer Output {\n"
510                 << "    uint values[" << numValues << "];\n"
511                 << "} sb_out;\n\n"
512                 << "void main (void) {\n"
513                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
514                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
515                 << "    uint globalOffs = localSize*globalNdx;\n"
516                 << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
517                 << "\n"
518                 << "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
519                 << "    memoryBarrierBuffer();\n"
520                 << "    barrier();\n"
521                 << "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"         // += so we read and write
522                 << "    memoryBarrierBuffer();\n"
523                 << "    barrier();\n"
524                 << "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
525                 << "}\n";
526
527         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
528 }
529
530 TestInstance* SSBOLocalBarrierTest::createInstance (Context& context) const
531 {
532         return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize);
533 }
534
535 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
536         : TestInstance  (context)
537         , m_localSize   (localSize)
538         , m_workSize    (workSize)
539 {
540 }
541
542 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate (void)
543 {
544         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
545         const VkDevice                  device                          = m_context.getDevice();
546         const VkQueue                   queue                           = m_context.getUniversalQueue();
547         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
548         Allocator&                              allocator                       = m_context.getDefaultAllocator();
549
550         const int workGroupSize = multiplyComponents(m_localSize);
551         const int workGroupCount = multiplyComponents(m_workSize);
552
553         // Create a buffer and host-visible memory for it
554
555         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
556         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
557
558         // Create descriptor set
559
560         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
561                 DescriptorSetLayoutBuilder()
562                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
563                 .build(vk, device));
564
565         const Unique<VkDescriptorPool> descriptorPool(
566                 DescriptorPoolBuilder()
567                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
568                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
569
570         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
571
572         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
573         DescriptorSetUpdateBuilder()
574                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
575                 .update(vk, device);
576
577         // Perform the computation
578
579         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
580         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
581         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
582
583         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
584
585         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
586         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
587
588         // Start recording commands
589
590         beginCommandBuffer(vk, *cmdBuffer);
591
592         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
593         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
594
595         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
596
597         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
598
599         endCommandBuffer(vk, *cmdBuffer);
600
601         // Wait for completion
602
603         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
604
605         // Validate the results
606
607         const Allocation& bufferAllocation = buffer.getAllocation();
608         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
609
610         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
611
612         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
613         {
614                 const int globalOffset = groupNdx * workGroupSize;
615                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
616                 {
617                         const deUint32  res             = bufferPtr[globalOffset + localOffset];
618                         const int               offs0   = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) : ((localOffset - 1) % workGroupSize);
619                         const int               offs1   = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) : ((localOffset - 2) % workGroupSize);
620                         const deUint32  ref             = static_cast<deUint32>(globalOffset + offs0 + offs1);
621
622                         if (res != ref)
623                         {
624                                 std::ostringstream msg;
625                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
626                                 return tcu::TestStatus::fail(msg.str());
627                         }
628                 }
629         }
630         return tcu::TestStatus::pass("Compute succeeded");
631 }
632
633 class CopyImageToSSBOTest : public vkt::TestCase
634 {
635 public:
636                                                 CopyImageToSSBOTest             (tcu::TestContext&      testCtx,
637                                                                                                  const std::string&     name,
638                                                                                                  const std::string&     description,
639                                                                                                  const tcu::IVec2&      localSize,
640                                                                                                  const tcu::IVec2&      imageSize);
641
642         void                            initPrograms                    (SourceCollections& sourceCollections) const;
643         TestInstance*           createInstance                  (Context&                       context) const;
644
645 private:
646         const tcu::IVec2        m_localSize;
647         const tcu::IVec2        m_imageSize;
648 };
649
650 class CopyImageToSSBOTestInstance : public vkt::TestInstance
651 {
652 public:
653                                                                         CopyImageToSSBOTestInstance             (Context&                       context,
654                                                                                                                                          const tcu::IVec2&      localSize,
655                                                                                                                                          const tcu::IVec2&      imageSize);
656
657         tcu::TestStatus                                 iterate                                                 (void);
658
659 private:
660         const tcu::IVec2                                m_localSize;
661         const tcu::IVec2                                m_imageSize;
662 };
663
664 CopyImageToSSBOTest::CopyImageToSSBOTest (tcu::TestContext&             testCtx,
665                                                                                   const std::string&    name,
666                                                                                   const std::string&    description,
667                                                                                   const tcu::IVec2&             localSize,
668                                                                                   const tcu::IVec2&             imageSize)
669         : TestCase              (testCtx, name, description)
670         , m_localSize   (localSize)
671         , m_imageSize   (imageSize)
672 {
673         DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
674         DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
675 }
676
677 void CopyImageToSSBOTest::initPrograms (SourceCollections& sourceCollections) const
678 {
679         std::ostringstream src;
680         src << "#version 310 es\n"
681                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
682                 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
683                 << "layout(binding = 0) writeonly buffer Output {\n"
684                 << "    uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
685                 << "} sb_out;\n\n"
686                 << "void main (void) {\n"
687                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
688                 << "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
689                 << "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
690                 << "}\n";
691
692         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
693 }
694
695 TestInstance* CopyImageToSSBOTest::createInstance (Context& context) const
696 {
697         return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize);
698 }
699
700 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
701         : TestInstance  (context)
702         , m_localSize   (localSize)
703         , m_imageSize   (imageSize)
704 {
705 }
706
707 tcu::TestStatus CopyImageToSSBOTestInstance::iterate (void)
708 {
709         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
710         const VkDevice                  device                          = m_context.getDevice();
711         const VkQueue                   queue                           = m_context.getUniversalQueue();
712         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
713         Allocator&                              allocator                       = m_context.getDefaultAllocator();
714
715         // Create an image
716
717         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
718         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
719
720         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
721         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
722
723         // Staging buffer (source data for image)
724
725         const deUint32 imageArea = multiplyComponents(m_imageSize);
726         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
727
728         const Buffer stagingBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
729
730         // Populate the staging buffer with test data
731         {
732                 de::Random rnd(0xab2c7);
733                 const Allocation& stagingBufferAllocation = stagingBuffer.getAllocation();
734                 deUint32* bufferPtr = static_cast<deUint32*>(stagingBufferAllocation.getHostPtr());
735                 for (deUint32 i = 0; i < imageArea; ++i)
736                         *bufferPtr++ = rnd.getUint32();
737
738                 flushMappedMemoryRange(vk, device, stagingBufferAllocation.getMemory(), stagingBufferAllocation.getOffset(), bufferSizeBytes);
739         }
740
741         // Create a buffer to store shader output
742
743         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
744
745         // Create descriptor set
746
747         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
748                 DescriptorSetLayoutBuilder()
749                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
750                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
751                 .build(vk, device));
752
753         const Unique<VkDescriptorPool> descriptorPool(
754                 DescriptorPoolBuilder()
755                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
756                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
757                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
758
759         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
760
761         // Set the bindings
762
763         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
764         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
765
766         DescriptorSetUpdateBuilder()
767                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
768                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
769                 .update(vk, device);
770
771         // Perform the computation
772         {
773                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
774                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
775                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
776
777                 const VkBufferMemoryBarrier stagingBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *stagingBuffer, 0ull, bufferSizeBytes);
778
779                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
780                         0u, 0u,
781                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
782                         *image, subresourceRange);
783
784                 const VkImageMemoryBarrier imagePostCopyBarrier = makeImageMemoryBarrier(
785                         VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
786                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
787                         *image, subresourceRange);
788
789                 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
790
791                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
792                 const tcu::IVec2 workSize = m_imageSize / m_localSize;
793
794                 // Prepare the command buffer
795
796                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
797                 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
798
799                 // Start recording commands
800
801                 beginCommandBuffer(vk, *cmdBuffer);
802
803                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
804                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
805
806                 vk.cmdPipelineBarrier(*cmdBuffer, 0u, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &stagingBufferPostHostWriteBarrier, 1, &imagePreCopyBarrier);
807                 vk.cmdCopyBufferToImage(*cmdBuffer, *stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &copyParams);
808                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePostCopyBarrier);
809
810                 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
811                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
812
813                 endCommandBuffer(vk, *cmdBuffer);
814
815                 // Wait for completion
816
817                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
818         }
819
820         // Validate the results
821
822         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
823         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
824
825         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
826         const deUint32* refBufferPtr = static_cast<deUint32*>(stagingBuffer.getAllocation().getHostPtr());
827
828         for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
829         {
830                 const deUint32 res = *(bufferPtr + ndx);
831                 const deUint32 ref = *(refBufferPtr + ndx);
832
833                 if (res != ref)
834                 {
835                         std::ostringstream msg;
836                         msg << "Comparison failed for Output.values[" << ndx << "]";
837                         return tcu::TestStatus::fail(msg.str());
838                 }
839         }
840         return tcu::TestStatus::pass("Compute succeeded");
841 }
842
843 class CopySSBOToImageTest : public vkt::TestCase
844 {
845 public:
846                                                 CopySSBOToImageTest     (tcu::TestContext&      testCtx,
847                                                                                          const std::string&     name,
848                                                                                          const std::string&     description,
849                                                                                          const tcu::IVec2&      localSize,
850                                                                                          const tcu::IVec2&      imageSize);
851
852         void                            initPrograms            (SourceCollections& sourceCollections) const;
853         TestInstance*           createInstance          (Context&                       context) const;
854
855 private:
856         const tcu::IVec2        m_localSize;
857         const tcu::IVec2        m_imageSize;
858 };
859
860 class CopySSBOToImageTestInstance : public vkt::TestInstance
861 {
862 public:
863                                                                         CopySSBOToImageTestInstance     (Context&                       context,
864                                                                                                                                  const tcu::IVec2&      localSize,
865                                                                                                                                  const tcu::IVec2&      imageSize);
866
867         tcu::TestStatus                                 iterate                                         (void);
868
869 private:
870         const tcu::IVec2                                m_localSize;
871         const tcu::IVec2                                m_imageSize;
872 };
873
874 CopySSBOToImageTest::CopySSBOToImageTest (tcu::TestContext&             testCtx,
875                                                                                   const std::string&    name,
876                                                                                   const std::string&    description,
877                                                                                   const tcu::IVec2&             localSize,
878                                                                                   const tcu::IVec2&             imageSize)
879         : TestCase              (testCtx, name, description)
880         , m_localSize   (localSize)
881         , m_imageSize   (imageSize)
882 {
883         DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
884         DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
885 }
886
887 void CopySSBOToImageTest::initPrograms (SourceCollections& sourceCollections) const
888 {
889         std::ostringstream src;
890         src << "#version 310 es\n"
891                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
892                 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
893                 << "layout(binding = 0) readonly buffer Input {\n"
894                 << "    uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
895                 << "} sb_in;\n\n"
896                 << "void main (void) {\n"
897                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
898                 << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
899                 << "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
900                 << "}\n";
901
902         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
903 }
904
905 TestInstance* CopySSBOToImageTest::createInstance (Context& context) const
906 {
907         return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize);
908 }
909
910 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
911         : TestInstance  (context)
912         , m_localSize   (localSize)
913         , m_imageSize   (imageSize)
914 {
915 }
916
917 tcu::TestStatus CopySSBOToImageTestInstance::iterate (void)
918 {
919         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
920         const VkDevice                  device                          = m_context.getDevice();
921         const VkQueue                   queue                           = m_context.getUniversalQueue();
922         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
923         Allocator&                              allocator                       = m_context.getDefaultAllocator();
924
925         // Create an image
926
927         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
928         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
929
930         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
931         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
932
933         // Create an input buffer (data to be read in the shader)
934
935         const deUint32 imageArea = multiplyComponents(m_imageSize);
936         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
937
938         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
939
940         // Populate the buffer with test data
941         {
942                 de::Random rnd(0x77238ac2);
943                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
944                 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
945                 for (deUint32 i = 0; i < imageArea; ++i)
946                         *bufferPtr++ = rnd.getUint32();
947
948                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
949         }
950
951         // Create a buffer to store shader output (copied from image data)
952
953         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
954
955         // Create descriptor set
956
957         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
958                 DescriptorSetLayoutBuilder()
959                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
960                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
961                 .build(vk, device));
962
963         const Unique<VkDescriptorPool> descriptorPool(
964                 DescriptorPoolBuilder()
965                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
966                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
967                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
968
969         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
970
971         // Set the bindings
972
973         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
974         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
975
976         DescriptorSetUpdateBuilder()
977                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
978                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
979                 .update(vk, device);
980
981         // Perform the computation
982         {
983                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
984                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
985                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
986
987                 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
988
989                 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
990                         0u, 0u,
991                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
992                         *image, subresourceRange);
993
994                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
995                         VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
996                         VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
997                         *image, subresourceRange);
998
999                 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1000
1001                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
1002                 const tcu::IVec2 workSize = m_imageSize / m_localSize;
1003
1004                 // Prepare the command buffer
1005
1006                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1007                 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1008
1009                 // Start recording commands
1010
1011                 beginCommandBuffer(vk, *cmdBuffer);
1012
1013                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1014                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1015
1016                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
1017                 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1018
1019                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
1020                 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &copyParams);
1021                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1022
1023                 endCommandBuffer(vk, *cmdBuffer);
1024
1025                 // Wait for completion
1026
1027                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1028         }
1029
1030         // Validate the results
1031
1032         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1033         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1034
1035         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1036         const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1037
1038         for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
1039         {
1040                 const deUint32 res = *(bufferPtr + ndx);
1041                 const deUint32 ref = *(refBufferPtr + ndx);
1042
1043                 if (res != ref)
1044                 {
1045                         std::ostringstream msg;
1046                         msg << "Comparison failed for pixel " << ndx;
1047                         return tcu::TestStatus::fail(msg.str());
1048                 }
1049         }
1050         return tcu::TestStatus::pass("Compute succeeded");
1051 }
1052
1053 class BufferToBufferInvertTest : public vkt::TestCase
1054 {
1055 public:
1056         void                                                            initPrograms                            (SourceCollections&     sourceCollections) const;
1057         TestInstance*                                           createInstance                          (Context&                       context) const;
1058
1059         static BufferToBufferInvertTest*        UBOToSSBOInvertCase                     (tcu::TestContext&      testCtx,
1060                                                                                                                                          const std::string& name,
1061                                                                                                                                          const std::string& description,
1062                                                                                                                                          const deUint32         numValues,
1063                                                                                                                                          const tcu::IVec3&      localSize,
1064                                                                                                                                          const tcu::IVec3&      workSize);
1065
1066         static BufferToBufferInvertTest*        CopyInvertSSBOCase                      (tcu::TestContext&      testCtx,
1067                                                                                                                                          const std::string& name,
1068                                                                                                                                          const std::string& description,
1069                                                                                                                                          const deUint32         numValues,
1070                                                                                                                                          const tcu::IVec3&      localSize,
1071                                                                                                                                          const tcu::IVec3&      workSize);
1072
1073 private:
1074                                                                                 BufferToBufferInvertTest        (tcu::TestContext&      testCtx,
1075                                                                                                                                          const std::string& name,
1076                                                                                                                                          const std::string& description,
1077                                                                                                                                          const deUint32         numValues,
1078                                                                                                                                          const tcu::IVec3&      localSize,
1079                                                                                                                                          const tcu::IVec3&      workSize,
1080                                                                                                                                          const BufferType       bufferType);
1081
1082         const BufferType                                        m_bufferType;
1083         const deUint32                                          m_numValues;
1084         const tcu::IVec3                                        m_localSize;
1085         const tcu::IVec3                                        m_workSize;
1086 };
1087
1088 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1089 {
1090 public:
1091                                                                         BufferToBufferInvertTestInstance        (Context&                       context,
1092                                                                                                                                                  const deUint32         numValues,
1093                                                                                                                                                  const tcu::IVec3&      localSize,
1094                                                                                                                                                  const tcu::IVec3&      workSize,
1095                                                                                                                                                  const BufferType       bufferType);
1096
1097         tcu::TestStatus                                 iterate                                                         (void);
1098
1099 private:
1100         const BufferType                                m_bufferType;
1101         const deUint32                                  m_numValues;
1102         const tcu::IVec3                                m_localSize;
1103         const tcu::IVec3                                m_workSize;
1104 };
1105
1106 BufferToBufferInvertTest::BufferToBufferInvertTest (tcu::TestContext&   testCtx,
1107                                                                                                         const std::string&      name,
1108                                                                                                         const std::string&      description,
1109                                                                                                         const deUint32          numValues,
1110                                                                                                         const tcu::IVec3&       localSize,
1111                                                                                                         const tcu::IVec3&       workSize,
1112                                                                                                         const BufferType        bufferType)
1113         : TestCase              (testCtx, name, description)
1114         , m_bufferType  (bufferType)
1115         , m_numValues   (numValues)
1116         , m_localSize   (localSize)
1117         , m_workSize    (workSize)
1118 {
1119         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1120         DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1121 }
1122
1123 BufferToBufferInvertTest* BufferToBufferInvertTest::UBOToSSBOInvertCase (tcu::TestContext&      testCtx,
1124                                                                                                                                                  const std::string&     name,
1125                                                                                                                                                  const std::string&     description,
1126                                                                                                                                                  const deUint32         numValues,
1127                                                                                                                                                  const tcu::IVec3&      localSize,
1128                                                                                                                                                  const tcu::IVec3&      workSize)
1129 {
1130         return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM);
1131 }
1132
1133 BufferToBufferInvertTest* BufferToBufferInvertTest::CopyInvertSSBOCase (tcu::TestContext&       testCtx,
1134                                                                                                                                                 const std::string&      name,
1135                                                                                                                                                 const std::string&      description,
1136                                                                                                                                                 const deUint32          numValues,
1137                                                                                                                                                 const tcu::IVec3&       localSize,
1138                                                                                                                                                 const tcu::IVec3&       workSize)
1139 {
1140         return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_SSBO);
1141 }
1142
1143 void BufferToBufferInvertTest::initPrograms (SourceCollections& sourceCollections) const
1144 {
1145         std::ostringstream src;
1146         if (m_bufferType == BUFFER_TYPE_UNIFORM)
1147         {
1148                 src << "#version 310 es\n"
1149                         << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1150                         << "layout(binding = 0) readonly uniform Input {\n"
1151                         << "    uint values[" << m_numValues << "];\n"
1152                         << "} ub_in;\n"
1153                         << "layout(binding = 1, std140) writeonly buffer Output {\n"
1154                         << "    uint values[" << m_numValues << "];\n"
1155                         << "} sb_out;\n"
1156                         << "void main (void) {\n"
1157                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1158                         << "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1159                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1160                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
1161                         << "\n"
1162                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1163                         << "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1164                         << "}\n";
1165         }
1166         else if (m_bufferType == BUFFER_TYPE_SSBO)
1167         {
1168                 src << "#version 310 es\n"
1169                         << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1170                         << "layout(binding = 0, std140) readonly buffer Input {\n"
1171                         << "    uint values[" << m_numValues << "];\n"
1172                         << "} sb_in;\n"
1173                         << "layout (binding = 1, std140) writeonly buffer Output {\n"
1174                         << "    uint values[" << m_numValues << "];\n"
1175                         << "} sb_out;\n"
1176                         << "void main (void) {\n"
1177                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1178                         << "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1179                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1180                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
1181                         << "\n"
1182                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1183                         << "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1184                         << "}\n";
1185         }
1186
1187         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1188 }
1189
1190 TestInstance* BufferToBufferInvertTest::createInstance (Context& context) const
1191 {
1192         return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType);
1193 }
1194
1195 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance (Context&                    context,
1196                                                                                                                                         const deUint32          numValues,
1197                                                                                                                                         const tcu::IVec3&       localSize,
1198                                                                                                                                         const tcu::IVec3&       workSize,
1199                                                                                                                                         const BufferType        bufferType)
1200         : TestInstance  (context)
1201         , m_bufferType  (bufferType)
1202         , m_numValues   (numValues)
1203         , m_localSize   (localSize)
1204         , m_workSize    (workSize)
1205 {
1206 }
1207
1208 tcu::TestStatus BufferToBufferInvertTestInstance::iterate (void)
1209 {
1210         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1211         const VkDevice                  device                          = m_context.getDevice();
1212         const VkQueue                   queue                           = m_context.getUniversalQueue();
1213         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1214         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1215
1216         // Customize the test based on buffer type
1217
1218         const VkBufferUsageFlags inputBufferUsageFlags          = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1219         const VkDescriptorType inputBufferDescriptorType        = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1220         const deUint32 randomSeed                                                       = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1221
1222         // Create an input buffer
1223
1224         const VkDeviceSize bufferSizeBytes = sizeof(tcu::UVec4) * m_numValues;
1225         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags), MemoryRequirement::HostVisible);
1226
1227         // Fill the input buffer with data
1228         {
1229                 de::Random rnd(randomSeed);
1230                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
1231                 tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(inputBufferAllocation.getHostPtr());
1232                 for (deUint32 i = 0; i < m_numValues; ++i)
1233                         bufferPtr[i].x() = rnd.getUint32();
1234
1235                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
1236         }
1237
1238         // Create an output buffer
1239
1240         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1241
1242         // Create descriptor set
1243
1244         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1245                 DescriptorSetLayoutBuilder()
1246                 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1247                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1248                 .build(vk, device));
1249
1250         const Unique<VkDescriptorPool> descriptorPool(
1251                 DescriptorPoolBuilder()
1252                 .addType(inputBufferDescriptorType)
1253                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1254                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1255
1256         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1257
1258         const VkDescriptorBufferInfo inputBufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1259         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1260         DescriptorSetUpdateBuilder()
1261                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType, &inputBufferDescriptorInfo)
1262                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1263                 .update(vk, device);
1264
1265         // Perform the computation
1266
1267         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1268         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1269         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1270
1271         const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1272
1273         const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1274
1275         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1276         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1277
1278         // Start recording commands
1279
1280         beginCommandBuffer(vk, *cmdBuffer);
1281
1282         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1283         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1284
1285         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1286         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1287         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1288
1289         endCommandBuffer(vk, *cmdBuffer);
1290
1291         // Wait for completion
1292
1293         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1294
1295         // Validate the results
1296
1297         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1298         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1299
1300         const tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(outputBufferAllocation.getHostPtr());
1301         const tcu::UVec4* refBufferPtr = static_cast<tcu::UVec4*>(inputBuffer.getAllocation().getHostPtr());
1302
1303         for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1304         {
1305                 const deUint32 res = bufferPtr[ndx].x();
1306                 const deUint32 ref = ~refBufferPtr[ndx].x();
1307
1308                 if (res != ref)
1309                 {
1310                         std::ostringstream msg;
1311                         msg << "Comparison failed for Output.values[" << ndx << "]";
1312                         return tcu::TestStatus::fail(msg.str());
1313                 }
1314         }
1315         return tcu::TestStatus::pass("Compute succeeded");
1316 }
1317
1318 class InvertSSBOInPlaceTest : public vkt::TestCase
1319 {
1320 public:
1321                                                 InvertSSBOInPlaceTest   (tcu::TestContext&      testCtx,
1322                                                                                                  const std::string&     name,
1323                                                                                                  const std::string&     description,
1324                                                                                                  const deUint32         numValues,
1325                                                                                                  const bool                     sized,
1326                                                                                                  const tcu::IVec3&      localSize,
1327                                                                                                  const tcu::IVec3&      workSize);
1328
1329
1330         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1331         TestInstance*           createInstance                  (Context&                       context) const;
1332
1333 private:
1334         const deUint32          m_numValues;
1335         const bool                      m_sized;
1336         const tcu::IVec3        m_localSize;
1337         const tcu::IVec3        m_workSize;
1338 };
1339
1340 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1341 {
1342 public:
1343                                                                         InvertSSBOInPlaceTestInstance   (Context&                       context,
1344                                                                                                                                          const deUint32         numValues,
1345                                                                                                                                          const tcu::IVec3&      localSize,
1346                                                                                                                                          const tcu::IVec3&      workSize);
1347
1348         tcu::TestStatus                                 iterate                                                 (void);
1349
1350 private:
1351         const deUint32                                  m_numValues;
1352         const tcu::IVec3                                m_localSize;
1353         const tcu::IVec3                                m_workSize;
1354 };
1355
1356 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest (tcu::TestContext&         testCtx,
1357                                                                                           const std::string&    name,
1358                                                                                           const std::string&    description,
1359                                                                                           const deUint32                numValues,
1360                                                                                           const bool                    sized,
1361                                                                                           const tcu::IVec3&             localSize,
1362                                                                                           const tcu::IVec3&             workSize)
1363         : TestCase              (testCtx, name, description)
1364         , m_numValues   (numValues)
1365         , m_sized               (sized)
1366         , m_localSize   (localSize)
1367         , m_workSize    (workSize)
1368 {
1369         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1370 }
1371
1372 void InvertSSBOInPlaceTest::initPrograms (SourceCollections& sourceCollections) const
1373 {
1374         std::ostringstream src;
1375         src << "#version 310 es\n"
1376                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1377                 << "layout(binding = 0) buffer InOut {\n"
1378                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1379                 << "} sb_inout;\n"
1380                 << "void main (void) {\n"
1381                 << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1382                 << "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1383                 << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1384                 << "    uint offset          = numValuesPerInv*groupNdx;\n"
1385                 << "\n"
1386                 << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1387                 << "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1388                 << "}\n";
1389
1390         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1391 }
1392
1393 TestInstance* InvertSSBOInPlaceTest::createInstance (Context& context) const
1394 {
1395         return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize);
1396 }
1397
1398 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance (Context&                  context,
1399                                                                                                                           const deUint32        numValues,
1400                                                                                                                           const tcu::IVec3&     localSize,
1401                                                                                                                           const tcu::IVec3&     workSize)
1402         : TestInstance  (context)
1403         , m_numValues   (numValues)
1404         , m_localSize   (localSize)
1405         , m_workSize    (workSize)
1406 {
1407 }
1408
1409 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate (void)
1410 {
1411         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1412         const VkDevice                  device                          = m_context.getDevice();
1413         const VkQueue                   queue                           = m_context.getUniversalQueue();
1414         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1415         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1416
1417         // Create an input/output buffer
1418
1419         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1420         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1421
1422         // Fill the buffer with data
1423
1424         typedef std::vector<deUint32> data_vector_t;
1425         data_vector_t inputData(m_numValues);
1426
1427         {
1428                 de::Random rnd(0x82ce7f);
1429                 const Allocation& bufferAllocation = buffer.getAllocation();
1430                 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1431                 for (deUint32 i = 0; i < m_numValues; ++i)
1432                         inputData[i] = *bufferPtr++ = rnd.getUint32();
1433
1434                 flushMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1435         }
1436
1437         // Create descriptor set
1438
1439         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1440                 DescriptorSetLayoutBuilder()
1441                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1442                 .build(vk, device));
1443
1444         const Unique<VkDescriptorPool> descriptorPool(
1445                 DescriptorPoolBuilder()
1446                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1447                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1448
1449         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1450
1451         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1452         DescriptorSetUpdateBuilder()
1453                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1454                 .update(vk, device);
1455
1456         // Perform the computation
1457
1458         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1459         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1460         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1461
1462         const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1463
1464         const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1465
1466         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1467         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1468
1469         // Start recording commands
1470
1471         beginCommandBuffer(vk, *cmdBuffer);
1472
1473         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1474         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1475
1476         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1477         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1478         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1479
1480         endCommandBuffer(vk, *cmdBuffer);
1481
1482         // Wait for completion
1483
1484         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1485
1486         // Validate the results
1487
1488         const Allocation& bufferAllocation = buffer.getAllocation();
1489         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1490
1491         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1492
1493         for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1494         {
1495                 const deUint32 res = bufferPtr[ndx];
1496                 const deUint32 ref = ~inputData[ndx];
1497
1498                 if (res != ref)
1499                 {
1500                         std::ostringstream msg;
1501                         msg << "Comparison failed for InOut.values[" << ndx << "]";
1502                         return tcu::TestStatus::fail(msg.str());
1503                 }
1504         }
1505         return tcu::TestStatus::pass("Compute succeeded");
1506 }
1507
1508 class WriteToMultipleSSBOTest : public vkt::TestCase
1509 {
1510 public:
1511                                                 WriteToMultipleSSBOTest (tcu::TestContext&      testCtx,
1512                                                                                                  const std::string&     name,
1513                                                                                                  const std::string&     description,
1514                                                                                                  const deUint32         numValues,
1515                                                                                                  const bool                     sized,
1516                                                                                                  const tcu::IVec3&      localSize,
1517                                                                                                  const tcu::IVec3&      workSize);
1518
1519         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1520         TestInstance*           createInstance                  (Context&                       context) const;
1521
1522 private:
1523         const deUint32          m_numValues;
1524         const bool                      m_sized;
1525         const tcu::IVec3        m_localSize;
1526         const tcu::IVec3        m_workSize;
1527 };
1528
1529 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1530 {
1531 public:
1532                                                                         WriteToMultipleSSBOTestInstance (Context&                       context,
1533                                                                                                                                          const deUint32         numValues,
1534                                                                                                                                          const tcu::IVec3&      localSize,
1535                                                                                                                                          const tcu::IVec3&      workSize);
1536
1537         tcu::TestStatus                                 iterate                                                 (void);
1538
1539 private:
1540         const deUint32                                  m_numValues;
1541         const tcu::IVec3                                m_localSize;
1542         const tcu::IVec3                                m_workSize;
1543 };
1544
1545 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest (tcu::TestContext&             testCtx,
1546                                                                                                   const std::string&    name,
1547                                                                                                   const std::string&    description,
1548                                                                                                   const deUint32                numValues,
1549                                                                                                   const bool                    sized,
1550                                                                                                   const tcu::IVec3&             localSize,
1551                                                                                                   const tcu::IVec3&             workSize)
1552         : TestCase              (testCtx, name, description)
1553         , m_numValues   (numValues)
1554         , m_sized               (sized)
1555         , m_localSize   (localSize)
1556         , m_workSize    (workSize)
1557 {
1558         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1559 }
1560
1561 void WriteToMultipleSSBOTest::initPrograms (SourceCollections& sourceCollections) const
1562 {
1563         std::ostringstream src;
1564         src << "#version 310 es\n"
1565                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1566                 << "layout(binding = 0) writeonly buffer Out0 {\n"
1567                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1568                 << "} sb_out0;\n"
1569                 << "layout(binding = 1) writeonly buffer Out1 {\n"
1570                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1571                 << "} sb_out1;\n"
1572                 << "void main (void) {\n"
1573                 << "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1574                 << "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1575                 << "\n"
1576                 << "    {\n"
1577                 << "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1578                 << "        uint offset          = numValuesPerInv*groupNdx;\n"
1579                 << "\n"
1580                 << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1581                 << "            sb_out0.values[offset + ndx] = offset + ndx;\n"
1582                 << "    }\n"
1583                 << "    {\n"
1584                 << "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1585                 << "        uint offset          = numValuesPerInv*groupNdx;\n"
1586                 << "\n"
1587                 << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1588                 << "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1589                 << "    }\n"
1590                 << "}\n";
1591
1592         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1593 }
1594
1595 TestInstance* WriteToMultipleSSBOTest::createInstance (Context& context) const
1596 {
1597         return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize);
1598 }
1599
1600 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance (Context&                      context,
1601                                                                                                                                   const deUint32        numValues,
1602                                                                                                                                   const tcu::IVec3&     localSize,
1603                                                                                                                                   const tcu::IVec3&     workSize)
1604         : TestInstance  (context)
1605         , m_numValues   (numValues)
1606         , m_localSize   (localSize)
1607         , m_workSize    (workSize)
1608 {
1609 }
1610
1611 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate (void)
1612 {
1613         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1614         const VkDevice                  device                          = m_context.getDevice();
1615         const VkQueue                   queue                           = m_context.getUniversalQueue();
1616         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1617         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1618
1619         // Create two output buffers
1620
1621         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1622         const Buffer buffer0(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1623         const Buffer buffer1(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1624
1625         // Create descriptor set
1626
1627         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1628                 DescriptorSetLayoutBuilder()
1629                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1630                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1631                 .build(vk, device));
1632
1633         const Unique<VkDescriptorPool> descriptorPool(
1634                 DescriptorPoolBuilder()
1635                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1636                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1637
1638         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1639
1640         const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1641         const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1642         DescriptorSetUpdateBuilder()
1643                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1644                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1645                 .update(vk, device);
1646
1647         // Perform the computation
1648
1649         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1650         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1651         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1652
1653         const VkBufferMemoryBarrier shaderWriteBarriers[] =
1654         {
1655                 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes),
1656                 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes)
1657         };
1658
1659         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1660         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1661
1662         // Start recording commands
1663
1664         beginCommandBuffer(vk, *cmdBuffer);
1665
1666         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1667         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1668
1669         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1670         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, DE_LENGTH_OF_ARRAY(shaderWriteBarriers), shaderWriteBarriers, 0, (const VkImageMemoryBarrier*)DE_NULL);
1671
1672         endCommandBuffer(vk, *cmdBuffer);
1673
1674         // Wait for completion
1675
1676         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1677
1678         // Validate the results
1679         {
1680                 const Allocation& buffer0Allocation = buffer0.getAllocation();
1681                 invalidateMappedMemoryRange(vk, device, buffer0Allocation.getMemory(), buffer0Allocation.getOffset(), bufferSizeBytes);
1682                 const deUint32* buffer0Ptr = static_cast<deUint32*>(buffer0Allocation.getHostPtr());
1683
1684                 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1685                 {
1686                         const deUint32 res = buffer0Ptr[ndx];
1687                         const deUint32 ref = ndx;
1688
1689                         if (res != ref)
1690                         {
1691                                 std::ostringstream msg;
1692                                 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1693                                 return tcu::TestStatus::fail(msg.str());
1694                         }
1695                 }
1696         }
1697         {
1698                 const Allocation& buffer1Allocation = buffer1.getAllocation();
1699                 invalidateMappedMemoryRange(vk, device, buffer1Allocation.getMemory(), buffer1Allocation.getOffset(), bufferSizeBytes);
1700                 const deUint32* buffer1Ptr = static_cast<deUint32*>(buffer1Allocation.getHostPtr());
1701
1702                 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1703                 {
1704                         const deUint32 res = buffer1Ptr[ndx];
1705                         const deUint32 ref = m_numValues - ndx;
1706
1707                         if (res != ref)
1708                         {
1709                                 std::ostringstream msg;
1710                                 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1711                                 return tcu::TestStatus::fail(msg.str());
1712                         }
1713                 }
1714         }
1715         return tcu::TestStatus::pass("Compute succeeded");
1716 }
1717
1718 class SSBOBarrierTest : public vkt::TestCase
1719 {
1720 public:
1721                                                 SSBOBarrierTest         (tcu::TestContext&      testCtx,
1722                                                                                          const std::string&     name,
1723                                                                                          const std::string&     description,
1724                                                                                          const tcu::IVec3&      workSize);
1725
1726         void                            initPrograms            (SourceCollections& sourceCollections) const;
1727         TestInstance*           createInstance          (Context&                       context) const;
1728
1729 private:
1730         const tcu::IVec3        m_workSize;
1731 };
1732
1733 class SSBOBarrierTestInstance : public vkt::TestInstance
1734 {
1735 public:
1736                                                                         SSBOBarrierTestInstance         (Context&                       context,
1737                                                                                                                                  const tcu::IVec3&      workSize);
1738
1739         tcu::TestStatus                                 iterate                                         (void);
1740
1741 private:
1742         const tcu::IVec3                                m_workSize;
1743 };
1744
1745 SSBOBarrierTest::SSBOBarrierTest (tcu::TestContext&             testCtx,
1746                                                                   const std::string&    name,
1747                                                                   const std::string&    description,
1748                                                                   const tcu::IVec3&             workSize)
1749         : TestCase              (testCtx, name, description)
1750         , m_workSize    (workSize)
1751 {
1752 }
1753
1754 void SSBOBarrierTest::initPrograms (SourceCollections& sourceCollections) const
1755 {
1756         sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
1757                 "#version 310 es\n"
1758                 "layout (local_size_x = 1) in;\n"
1759                 "layout(binding = 2) readonly uniform Constants {\n"
1760                 "    uint u_baseVal;\n"
1761                 "};\n"
1762                 "layout(binding = 1) writeonly buffer Output {\n"
1763                 "    uint values[];\n"
1764                 "};\n"
1765                 "void main (void) {\n"
1766                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1767                 "    values[offset] = u_baseVal + offset;\n"
1768                 "}\n");
1769
1770         sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
1771                 "#version 310 es\n"
1772                 "layout (local_size_x = 1) in;\n"
1773                 "layout(binding = 1) readonly buffer Input {\n"
1774                 "    uint values[];\n"
1775                 "};\n"
1776                 "layout(binding = 0) coherent buffer Output {\n"
1777                 "    uint sum;\n"
1778                 "};\n"
1779                 "void main (void) {\n"
1780                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1781                 "    uint value  = values[offset];\n"
1782                 "    atomicAdd(sum, value);\n"
1783                 "}\n");
1784 }
1785
1786 TestInstance* SSBOBarrierTest::createInstance (Context& context) const
1787 {
1788         return new SSBOBarrierTestInstance(context, m_workSize);
1789 }
1790
1791 SSBOBarrierTestInstance::SSBOBarrierTestInstance (Context& context, const tcu::IVec3& workSize)
1792         : TestInstance  (context)
1793         , m_workSize    (workSize)
1794 {
1795 }
1796
1797 tcu::TestStatus SSBOBarrierTestInstance::iterate (void)
1798 {
1799         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1800         const VkDevice                  device                          = m_context.getDevice();
1801         const VkQueue                   queue                           = m_context.getUniversalQueue();
1802         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1803         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1804
1805         // Create a work buffer used by both shaders
1806
1807         const int workGroupCount = multiplyComponents(m_workSize);
1808         const VkDeviceSize workBufferSizeBytes = sizeof(deUint32) * workGroupCount;
1809         const Buffer workBuffer(vk, device, allocator, makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any);
1810
1811         // Create an output buffer
1812
1813         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
1814         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1815
1816         // Initialize atomic counter value to zero
1817         {
1818                 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1819                 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1820                 *outputBufferPtr = 0;
1821                 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1822         }
1823
1824         // Create a uniform buffer (to pass uniform constants)
1825
1826         const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
1827         const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
1828
1829         // Set the constants in the uniform buffer
1830
1831         const deUint32  baseValue = 127;
1832         {
1833                 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
1834                 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
1835                 uniformBufferPtr[0] = baseValue;
1836
1837                 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
1838         }
1839
1840         // Create descriptor set
1841
1842         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1843                 DescriptorSetLayoutBuilder()
1844                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1845                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1846                 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1847                 .build(vk, device));
1848
1849         const Unique<VkDescriptorPool> descriptorPool(
1850                 DescriptorPoolBuilder()
1851                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1852                 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1853                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1854
1855         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1856
1857         const VkDescriptorBufferInfo workBufferDescriptorInfo = makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1858         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1859         const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1860         DescriptorSetUpdateBuilder()
1861                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1862                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1863                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1864                 .update(vk, device);
1865
1866         // Perform the computation
1867
1868         const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
1869         const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
1870
1871         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1872         const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
1873         const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
1874
1875         const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
1876
1877         const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
1878
1879         const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
1880
1881         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1882         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1883
1884         // Start recording commands
1885
1886         beginCommandBuffer(vk, *cmdBuffer);
1887
1888         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
1889         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1890
1891         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1892
1893         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1894         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &betweenShadersBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1895
1896         // Switch to the second shader program
1897         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
1898
1899         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1900         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1901
1902         endCommandBuffer(vk, *cmdBuffer);
1903
1904         // Wait for completion
1905
1906         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1907
1908         // Validate the results
1909
1910         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1911         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1912
1913         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1914         const deUint32  res = *bufferPtr;
1915         deUint32                ref = 0;
1916
1917         for (int ndx = 0; ndx < workGroupCount; ++ndx)
1918                 ref += baseValue + ndx;
1919
1920         if (res != ref)
1921         {
1922                 std::ostringstream msg;
1923                 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
1924                 return tcu::TestStatus::fail(msg.str());
1925         }
1926         return tcu::TestStatus::pass("Compute succeeded");
1927 }
1928
1929 class ImageAtomicOpTest : public vkt::TestCase
1930 {
1931 public:
1932                                                 ImageAtomicOpTest               (tcu::TestContext&      testCtx,
1933                                                                                                  const std::string& name,
1934                                                                                                  const std::string& description,
1935                                                                                                  const deUint32         localSize,
1936                                                                                                  const tcu::IVec2&      imageSize);
1937
1938         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1939         TestInstance*           createInstance                  (Context&                       context) const;
1940
1941 private:
1942         const deUint32          m_localSize;
1943         const tcu::IVec2        m_imageSize;
1944 };
1945
1946 class ImageAtomicOpTestInstance : public vkt::TestInstance
1947 {
1948 public:
1949                                                                         ImageAtomicOpTestInstance               (Context&                       context,
1950                                                                                                                                          const deUint32         localSize,
1951                                                                                                                                          const tcu::IVec2&      imageSize);
1952
1953         tcu::TestStatus                                 iterate                                                 (void);
1954
1955 private:
1956         const deUint32                                  m_localSize;
1957         const tcu::IVec2                                m_imageSize;
1958 };
1959
1960 ImageAtomicOpTest::ImageAtomicOpTest (tcu::TestContext&         testCtx,
1961                                                                           const std::string&    name,
1962                                                                           const std::string&    description,
1963                                                                           const deUint32                localSize,
1964                                                                           const tcu::IVec2&             imageSize)
1965         : TestCase              (testCtx, name, description)
1966         , m_localSize   (localSize)
1967         , m_imageSize   (imageSize)
1968 {
1969 }
1970
1971 void ImageAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
1972 {
1973         std::ostringstream src;
1974         src << "#version 310 es\n"
1975                 << "#extension GL_OES_shader_image_atomic : require\n"
1976                 << "layout (local_size_x = " << m_localSize << ") in;\n"
1977                 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
1978                 << "layout(binding = 0) readonly buffer Input {\n"
1979                 << "    uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
1980                 << "} sb_in;\n\n"
1981                 << "void main (void) {\n"
1982                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1983                 << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1984                 << "\n"
1985                 << "    if (gl_LocalInvocationIndex == 0u)\n"
1986                 << "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1987                 << "    memoryBarrierImage();\n"
1988                 << "    barrier();\n"
1989                 << "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1990                 << "}\n";
1991
1992         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1993 }
1994
1995 TestInstance* ImageAtomicOpTest::createInstance (Context& context) const
1996 {
1997         return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize);
1998 }
1999
2000 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance (Context& context, const deUint32 localSize, const tcu::IVec2& imageSize)
2001         : TestInstance  (context)
2002         , m_localSize   (localSize)
2003         , m_imageSize   (imageSize)
2004 {
2005 }
2006
2007 tcu::TestStatus ImageAtomicOpTestInstance::iterate (void)
2008 {
2009         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
2010         const VkDevice                  device                          = m_context.getDevice();
2011         const VkQueue                   queue                           = m_context.getUniversalQueue();
2012         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
2013         Allocator&                              allocator                       = m_context.getDefaultAllocator();
2014
2015         // Create an image
2016
2017         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2018         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2019
2020         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2021         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2022
2023         // Input buffer
2024
2025         const deUint32 numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2026         const VkDeviceSize inputBufferSizeBytes = sizeof(deUint32) * numInputValues;
2027
2028         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
2029
2030         // Populate the input buffer with test data
2031         {
2032                 de::Random rnd(0x77238ac2);
2033                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
2034                 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
2035                 for (deUint32 i = 0; i < numInputValues; ++i)
2036                         *bufferPtr++ = rnd.getUint32();
2037
2038                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), inputBufferSizeBytes);
2039         }
2040
2041         // Create a buffer to store shader output (copied from image data)
2042
2043         const deUint32 imageArea = multiplyComponents(m_imageSize);
2044         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32) * imageArea;
2045         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
2046
2047         // Create descriptor set
2048
2049         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2050                 DescriptorSetLayoutBuilder()
2051                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2052                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2053                 .build(vk, device));
2054
2055         const Unique<VkDescriptorPool> descriptorPool(
2056                 DescriptorPoolBuilder()
2057                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2058                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2059                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2060
2061         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2062
2063         // Set the bindings
2064
2065         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2066         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2067
2068         DescriptorSetUpdateBuilder()
2069                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2070                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2071                 .update(vk, device);
2072
2073         // Perform the computation
2074         {
2075                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
2076                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2077                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2078
2079                 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2080
2081                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
2082                         VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
2083                         VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2084                         *image, subresourceRange);
2085
2086                 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2087
2088                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
2089
2090                 // Prepare the command buffer
2091
2092                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2093                 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2094
2095                 // Start recording commands
2096
2097                 beginCommandBuffer(vk, *cmdBuffer);
2098
2099                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2100                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2101
2102                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2103                 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2104
2105                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
2106                 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &copyParams);
2107                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2108
2109                 endCommandBuffer(vk, *cmdBuffer);
2110
2111                 // Wait for completion
2112
2113                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2114         }
2115
2116         // Validate the results
2117
2118         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2119         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2120
2121         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2122         const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
2123
2124         for (deUint32 pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2125         {
2126                 const deUint32  res = bufferPtr[pixelNdx];
2127                 deUint32                ref = 0;
2128
2129                 for (deUint32 offs = 0; offs < m_localSize; ++offs)
2130                         ref += refBufferPtr[pixelNdx * m_localSize + offs];
2131
2132                 if (res != ref)
2133                 {
2134                         std::ostringstream msg;
2135                         msg << "Comparison failed for pixel " << pixelNdx;
2136                         return tcu::TestStatus::fail(msg.str());
2137                 }
2138         }
2139         return tcu::TestStatus::pass("Compute succeeded");
2140 }
2141
2142 class ImageBarrierTest : public vkt::TestCase
2143 {
2144 public:
2145                                                 ImageBarrierTest        (tcu::TestContext&      testCtx,
2146                                                                                         const std::string&      name,
2147                                                                                         const std::string&      description,
2148                                                                                         const tcu::IVec2&       imageSize);
2149
2150         void                            initPrograms            (SourceCollections& sourceCollections) const;
2151         TestInstance*           createInstance          (Context&                       context) const;
2152
2153 private:
2154         const tcu::IVec2        m_imageSize;
2155 };
2156
2157 class ImageBarrierTestInstance : public vkt::TestInstance
2158 {
2159 public:
2160                                                                         ImageBarrierTestInstance        (Context&                       context,
2161                                                                                                                                  const tcu::IVec2&      imageSize);
2162
2163         tcu::TestStatus                                 iterate                                         (void);
2164
2165 private:
2166         const tcu::IVec2                                m_imageSize;
2167 };
2168
2169 ImageBarrierTest::ImageBarrierTest (tcu::TestContext&   testCtx,
2170                                                                         const std::string&      name,
2171                                                                         const std::string&      description,
2172                                                                         const tcu::IVec2&       imageSize)
2173         : TestCase              (testCtx, name, description)
2174         , m_imageSize   (imageSize)
2175 {
2176 }
2177
2178 void ImageBarrierTest::initPrograms (SourceCollections& sourceCollections) const
2179 {
2180         sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
2181                 "#version 310 es\n"
2182                 "layout (local_size_x = 1) in;\n"
2183                 "layout(binding = 2) readonly uniform Constants {\n"
2184                 "    uint u_baseVal;\n"
2185                 "};\n"
2186                 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2187                 "void main (void) {\n"
2188                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2189                 "    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2190                 "}\n");
2191
2192         sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
2193                 "#version 310 es\n"
2194                 "layout (local_size_x = 1) in;\n"
2195                 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2196                 "layout(binding = 0) coherent buffer Output {\n"
2197                 "    uint sum;\n"
2198                 "};\n"
2199                 "void main (void) {\n"
2200                 "    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2201                 "    atomicAdd(sum, value);\n"
2202                 "}\n");
2203 }
2204
2205 TestInstance* ImageBarrierTest::createInstance (Context& context) const
2206 {
2207         return new ImageBarrierTestInstance(context, m_imageSize);
2208 }
2209
2210 ImageBarrierTestInstance::ImageBarrierTestInstance (Context& context, const tcu::IVec2& imageSize)
2211         : TestInstance  (context)
2212         , m_imageSize   (imageSize)
2213 {
2214 }
2215
2216 tcu::TestStatus ImageBarrierTestInstance::iterate (void)
2217 {
2218         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
2219         const VkDevice                  device                          = m_context.getDevice();
2220         const VkQueue                   queue                           = m_context.getUniversalQueue();
2221         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
2222         Allocator&                              allocator                       = m_context.getDefaultAllocator();
2223
2224         // Create an image used by both shaders
2225
2226         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2227         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2228
2229         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2230         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2231
2232         // Create an output buffer
2233
2234         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
2235         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2236
2237         // Initialize atomic counter value to zero
2238         {
2239                 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2240                 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2241                 *outputBufferPtr = 0;
2242                 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2243         }
2244
2245         // Create a uniform buffer (to pass uniform constants)
2246
2247         const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
2248         const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2249
2250         // Set the constants in the uniform buffer
2251
2252         const deUint32  baseValue = 127;
2253         {
2254                 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
2255                 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
2256                 uniformBufferPtr[0] = baseValue;
2257
2258                 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
2259         }
2260
2261         // Create descriptor set
2262
2263         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2264                 DescriptorSetLayoutBuilder()
2265                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2266                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2267                 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2268                 .build(vk, device));
2269
2270         const Unique<VkDescriptorPool> descriptorPool(
2271                 DescriptorPoolBuilder()
2272                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2273                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2274                 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2275                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2276
2277         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2278
2279         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2280         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2281         const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2282         DescriptorSetUpdateBuilder()
2283                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2284                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2285                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2286                 .update(vk, device);
2287
2288         // Perform the computation
2289
2290         const Unique<VkShaderModule>    shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
2291         const Unique<VkShaderModule>    shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
2292
2293         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2294         const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
2295         const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
2296
2297         const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2298
2299         const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2300                 0u, 0u,
2301                 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2302                 *image, subresourceRange);
2303
2304         const VkImageMemoryBarrier imageBarrierBetweenShaders = makeImageMemoryBarrier(
2305                 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
2306                 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2307                 *image, subresourceRange);
2308
2309         const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2310
2311         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2312         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2313
2314         // Start recording commands
2315
2316         beginCommandBuffer(vk, *cmdBuffer);
2317
2318         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
2319         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2320
2321         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 1, &imageLayoutBarrier);
2322
2323         vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2324         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imageBarrierBetweenShaders);
2325
2326         // Switch to the second shader program
2327         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
2328
2329         vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2330         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2331
2332         endCommandBuffer(vk, *cmdBuffer);
2333
2334         // Wait for completion
2335
2336         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2337
2338         // Validate the results
2339
2340         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2341         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2342
2343         const int               numValues = multiplyComponents(m_imageSize);
2344         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2345         const deUint32  res = *bufferPtr;
2346         deUint32                ref = 0;
2347
2348         for (int ndx = 0; ndx < numValues; ++ndx)
2349                 ref += baseValue + ndx;
2350
2351         if (res != ref)
2352         {
2353                 std::ostringstream msg;
2354                 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2355                 return tcu::TestStatus::fail(msg.str());
2356         }
2357         return tcu::TestStatus::pass("Compute succeeded");
2358 }
2359
2360 namespace EmptyShaderTest
2361 {
2362
2363 void createProgram (SourceCollections& dst)
2364 {
2365         dst.glslSources.add("comp") << glu::ComputeSource(
2366                 "#version 310 es\n"
2367                 "layout (local_size_x = 1) in;\n"
2368                 "void main (void) {}\n"
2369         );
2370 }
2371
2372 tcu::TestStatus createTest (Context& context)
2373 {
2374         const DeviceInterface&  vk                                      = context.getDeviceInterface();
2375         const VkDevice                  device                          = context.getDevice();
2376         const VkQueue                   queue                           = context.getUniversalQueue();
2377         const deUint32                  queueFamilyIndex        = context.getUniversalQueueFamilyIndex();
2378
2379         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u));
2380
2381         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device));
2382         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2383
2384         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2385         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2386
2387         // Start recording commands
2388
2389         beginCommandBuffer(vk, *cmdBuffer);
2390
2391         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2392
2393         const tcu::IVec3 workGroups(1, 1, 1);
2394         vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
2395
2396         endCommandBuffer(vk, *cmdBuffer);
2397
2398         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2399
2400         return tcu::TestStatus::pass("Compute succeeded");
2401 }
2402
2403 } // EmptyShaderTest ns
2404 } // anonymous
2405
2406 tcu::TestCaseGroup* createBasicComputeShaderTests (tcu::TestContext& testCtx)
2407 {
2408         de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic", "Basic compute tests"));
2409
2410         addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", "Shader that does nothing", EmptyShaderTest::createProgram, EmptyShaderTest::createTest);
2411
2412         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_single_invocation",        "Copy from UBO to SSBO, inverting bits",        256,    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2413         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_single_group",                     "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(2,1,4),      tcu::IVec3(1,1,1)));
2414         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_multiple_invocations",     "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(1,1,1),      tcu::IVec3(2,4,1)));
2415         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_multiple_groups",          "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2416
2417         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_single_invocation",          "Copy between SSBOs, inverting bits",   256,    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2418         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_multiple_invocations",       "Copy between SSBOs, inverting bits",   1024,   tcu::IVec3(1,1,1),      tcu::IVec3(2,4,1)));
2419         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_multiple_groups",            "Copy between SSBOs, inverting bits",   1024,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2420
2421         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_rw_single_invocation",                    "Read and write same SSBO",             256,    true,   tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2422         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_rw_multiple_groups",                              "Read and write same SSBO",             1024,   true,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2423         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_unsized_arr_single_invocation",   "Read and write same SSBO",             256,    false,  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2424         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_unsized_arr_multiple_groups",             "Read and write same SSBO",             1024,   false,  tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2425
2426         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_arr_single_invocation",                 "Write to multiple SSBOs",      256,    true,   tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2427         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_arr_multiple_groups",                   "Write to multiple SSBOs",      1024,   true,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2428         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs",      256,    false,  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2429         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_unsized_arr_multiple_groups",   "Write to multiple SSBOs",      1024,   false,  tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2430
2431         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_single_invocation", "SSBO local barrier usage",     tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2432         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_single_group",              "SSBO local barrier usage",     tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2433         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_multiple_groups",   "SSBO local barrier usage",     tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2434
2435         basicComputeTests->addChild(new SSBOBarrierTest(testCtx,        "ssbo_cmd_barrier_single",              "SSBO memory barrier usage",    tcu::IVec3(1,1,1)));
2436         basicComputeTests->addChild(new SSBOBarrierTest(testCtx,        "ssbo_cmd_barrier_multiple",    "SSBO memory barrier usage",    tcu::IVec3(11,5,7)));
2437
2438         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_single_invocation",         "Basic shared variable usage",  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2439         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_single_group",                      "Basic shared variable usage",  tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2440         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_multiple_invocations",      "Basic shared variable usage",  tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
2441         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_multiple_groups",           "Basic shared variable usage",  tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2442
2443         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_single_invocation",           "Atomic operation with shared var",             tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2444         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_single_group",                        "Atomic operation with shared var",             tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2445         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_multiple_invocations",        "Atomic operation with shared var",             tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
2446         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_multiple_groups",                     "Atomic operation with shared var",             tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2447
2448         basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx,    "copy_image_to_ssbo_small",     "Image to SSBO copy",   tcu::IVec2(1,1),        tcu::IVec2(64,64)));
2449         basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx,    "copy_image_to_ssbo_large",     "Image to SSBO copy",   tcu::IVec2(2,4),        tcu::IVec2(512,512)));
2450
2451         basicComputeTests->addChild(new CopySSBOToImageTest(testCtx,    "copy_ssbo_to_image_small",     "SSBO to image copy",   tcu::IVec2(1, 1),       tcu::IVec2(64, 64)));
2452         basicComputeTests->addChild(new CopySSBOToImageTest(testCtx,    "copy_ssbo_to_image_large",     "SSBO to image copy",   tcu::IVec2(2, 4),       tcu::IVec2(512, 512)));
2453
2454         basicComputeTests->addChild(new ImageAtomicOpTest(testCtx,      "image_atomic_op_local_size_1", "Atomic operation with image",  1,      tcu::IVec2(64,64)));
2455         basicComputeTests->addChild(new ImageAtomicOpTest(testCtx,      "image_atomic_op_local_size_8", "Atomic operation with image",  8,      tcu::IVec2(64,64)));
2456
2457         basicComputeTests->addChild(new ImageBarrierTest(testCtx,       "image_barrier_single",         "Image barrier",        tcu::IVec2(1,1)));
2458         basicComputeTests->addChild(new ImageBarrierTest(testCtx,       "image_barrier_multiple",       "Image barrier",        tcu::IVec2(64,64)));
2459
2460         return basicComputeTests.release();
2461 }
2462
2463 } // compute
2464 } // vkt