Remove GLES2 texture unit tests from the mustpass. am: 0678398f7a
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / compute / vktComputeBasicComputeShaderTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 The Android Open Source Project
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and/or associated documentation files (the
10  * "Materials"), to deal in the Materials without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sublicense, and/or sell copies of the Materials, and to
13  * permit persons to whom the Materials are furnished to do so, subject to
14  * the following conditions:
15  *
16  * The above copyright notice(s) and this permission notice shall be included
17  * in all copies or substantial portions of the Materials.
18  *
19  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
26  *
27  *//*!
28  * \file
29  * \brief Compute Shader Tests
30  *//*--------------------------------------------------------------------*/
31
32 #include "vktComputeBasicComputeShaderTests.hpp"
33 #include "vktTestCase.hpp"
34 #include "vktTestCaseUtil.hpp"
35 #include "vktComputeTestsUtil.hpp"
36
37 #include "vkDefs.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkPlatform.hpp"
41 #include "vkPrograms.hpp"
42 #include "vkRefUtil.hpp"
43 #include "vkMemUtil.hpp"
44 #include "vkQueryUtil.hpp"
45 #include "vkBuilderUtil.hpp"
46 #include "vkTypeUtil.hpp"
47
48 #include "deStringUtil.hpp"
49 #include "deUniquePtr.hpp"
50 #include "deRandom.hpp"
51
52 #include <vector>
53
54 using namespace vk;
55
56 namespace vkt
57 {
58 namespace compute
59 {
60 namespace
61 {
62
63 template<typename T, int size>
64 T multiplyComponents (const tcu::Vector<T, size>& v)
65 {
66         T accum = 1;
67         for (int i = 0; i < size; ++i)
68                 accum *= v[i];
69         return accum;
70 }
71
72 template<typename T>
73 inline T squared (const T& a)
74 {
75         return a * a;
76 }
77
78 inline VkImageCreateInfo make2DImageCreateInfo (const tcu::IVec2& imageSize, const VkImageUsageFlags usage)
79 {
80         const VkImageCreateInfo imageParams =
81         {
82                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                            // VkStructureType                      sType;
83                 DE_NULL,                                                                                        // const void*                          pNext;
84                 0u,                                                                                                     // VkImageCreateFlags           flags;
85                 VK_IMAGE_TYPE_2D,                                                                       // VkImageType                          imageType;
86                 VK_FORMAT_R32_UINT,                                                                     // VkFormat                                     format;
87                 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1),      // VkExtent3D                           extent;
88                 1u,                                                                                                     // deUint32                                     mipLevels;
89                 1u,                                                                                                     // deUint32                                     arrayLayers;
90                 VK_SAMPLE_COUNT_1_BIT,                                                          // VkSampleCountFlagBits        samples;
91                 VK_IMAGE_TILING_OPTIMAL,                                                        // VkImageTiling                        tiling;
92                 usage,                                                                                          // VkImageUsageFlags            usage;
93                 VK_SHARING_MODE_EXCLUSIVE,                                                      // VkSharingMode                        sharingMode;
94                 0u,                                                                                                     // deUint32                                     queueFamilyIndexCount;
95                 DE_NULL,                                                                                        // const deUint32*                      pQueueFamilyIndices;
96                 VK_IMAGE_LAYOUT_UNDEFINED,                                                      // VkImageLayout                        initialLayout;
97         };
98         return imageParams;
99 }
100
101 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2& imageSize)
102 {
103         return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
104 }
105
106 enum BufferType
107 {
108         BUFFER_TYPE_UNIFORM,
109         BUFFER_TYPE_SSBO,
110 };
111
112 class SharedVarTest : public vkt::TestCase
113 {
114 public:
115                                                 SharedVarTest   (tcu::TestContext&              testCtx,
116                                                                                  const std::string&             name,
117                                                                                  const std::string&             description,
118                                                                                  const tcu::IVec3&              localSize,
119                                                                                  const tcu::IVec3&              workSize);
120
121         void                            initPrograms    (SourceCollections&             sourceCollections) const;
122         TestInstance*           createInstance  (Context&                               context) const;
123
124 private:
125         const tcu::IVec3        m_localSize;
126         const tcu::IVec3        m_workSize;
127 };
128
129 class SharedVarTestInstance : public vkt::TestInstance
130 {
131 public:
132                                                                         SharedVarTestInstance   (Context&                       context,
133                                                                                                                          const tcu::IVec3&      localSize,
134                                                                                                                          const tcu::IVec3&      workSize);
135
136         tcu::TestStatus                                 iterate                                 (void);
137
138 private:
139         const tcu::IVec3                                m_localSize;
140         const tcu::IVec3                                m_workSize;
141 };
142
143 SharedVarTest::SharedVarTest (tcu::TestContext&         testCtx,
144                                                           const std::string&    name,
145                                                           const std::string&    description,
146                                                           const tcu::IVec3&             localSize,
147                                                           const tcu::IVec3&             workSize)
148         : TestCase              (testCtx, name, description)
149         , m_localSize   (localSize)
150         , m_workSize    (workSize)
151 {
152 }
153
154 void SharedVarTest::initPrograms (SourceCollections& sourceCollections) const
155 {
156         const int workGroupSize = multiplyComponents(m_localSize);
157         const int workGroupCount = multiplyComponents(m_workSize);
158         const int numValues = workGroupSize * workGroupCount;
159
160         std::ostringstream src;
161         src << "#version 310 es\n"
162                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
163                 << "layout(binding = 0) writeonly buffer Output {\n"
164                 << "    uint values[" << numValues << "];\n"
165                 << "} sb_out;\n\n"
166                 << "shared uint offsets[" << workGroupSize << "];\n\n"
167                 << "void main (void) {\n"
168                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
169                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
170                 << "    uint globalOffs = localSize*globalNdx;\n"
171                 << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
172                 << "\n"
173                 << "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
174                 << "    memoryBarrierShared();\n"
175                 << "    barrier();\n"
176                 << "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
177                 << "}\n";
178
179         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
180 }
181
182 TestInstance* SharedVarTest::createInstance (Context& context) const
183 {
184         return new SharedVarTestInstance(context, m_localSize, m_workSize);
185 }
186
187 SharedVarTestInstance::SharedVarTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
188         : TestInstance  (context)
189         , m_localSize   (localSize)
190         , m_workSize    (workSize)
191 {
192 }
193
194 tcu::TestStatus SharedVarTestInstance::iterate (void)
195 {
196         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
197         const VkDevice                  device                          = m_context.getDevice();
198         const VkQueue                   queue                           = m_context.getUniversalQueue();
199         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
200         Allocator&                              allocator                       = m_context.getDefaultAllocator();
201
202         const int workGroupSize = multiplyComponents(m_localSize);
203         const int workGroupCount = multiplyComponents(m_workSize);
204
205         // Create a buffer and host-visible memory for it
206
207         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
208         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
209
210         // Create descriptor set
211
212         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
213                 DescriptorSetLayoutBuilder()
214                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
215                 .build(vk, device));
216
217         const Unique<VkDescriptorPool> descriptorPool(
218                 DescriptorPoolBuilder()
219                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
220                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
221
222         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
223
224         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
225         DescriptorSetUpdateBuilder()
226                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
227                 .update(vk, device);
228
229         // Perform the computation
230
231         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
232         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
233         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
234
235         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
236
237         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
238         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
239
240         // Start recording commands
241
242         beginCommandBuffer(vk, *cmdBuffer);
243
244         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
245         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
246
247         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
248
249         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
250
251         endCommandBuffer(vk, *cmdBuffer);
252
253         // Wait for completion
254
255         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
256
257         // Validate the results
258
259         const Allocation& bufferAllocation = buffer.getAllocation();
260         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
261
262         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
263
264         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
265         {
266                 const int globalOffset = groupNdx * workGroupSize;
267                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
268                 {
269                         const deUint32 res = bufferPtr[globalOffset + localOffset];
270                         const deUint32 ref = globalOffset + squared(workGroupSize - localOffset - 1);
271
272                         if (res != ref)
273                         {
274                                 std::ostringstream msg;
275                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
276                                 return tcu::TestStatus::fail(msg.str());
277                         }
278                 }
279         }
280         return tcu::TestStatus::pass("Compute succeeded");
281 }
282
283 class SharedVarAtomicOpTest : public vkt::TestCase
284 {
285 public:
286                                                 SharedVarAtomicOpTest   (tcu::TestContext&      testCtx,
287                                                                                                  const std::string&     name,
288                                                                                                  const std::string&     description,
289                                                                                                  const tcu::IVec3&      localSize,
290                                                                                                  const tcu::IVec3&      workSize);
291
292         void                            initPrograms                    (SourceCollections& sourceCollections) const;
293         TestInstance*           createInstance                  (Context&                       context) const;
294
295 private:
296         const tcu::IVec3        m_localSize;
297         const tcu::IVec3        m_workSize;
298 };
299
300 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
301 {
302 public:
303                                                                         SharedVarAtomicOpTestInstance   (Context&                       context,
304                                                                                                                                          const tcu::IVec3&      localSize,
305                                                                                                                                          const tcu::IVec3&      workSize);
306
307         tcu::TestStatus                                 iterate                                                 (void);
308
309 private:
310         const tcu::IVec3                                m_localSize;
311         const tcu::IVec3                                m_workSize;
312 };
313
314 SharedVarAtomicOpTest::SharedVarAtomicOpTest (tcu::TestContext&         testCtx,
315                                                                                           const std::string&    name,
316                                                                                           const std::string&    description,
317                                                                                           const tcu::IVec3&             localSize,
318                                                                                           const tcu::IVec3&             workSize)
319         : TestCase              (testCtx, name, description)
320         , m_localSize   (localSize)
321         , m_workSize    (workSize)
322 {
323 }
324
325 void SharedVarAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
326 {
327         const int workGroupSize = multiplyComponents(m_localSize);
328         const int workGroupCount = multiplyComponents(m_workSize);
329         const int numValues = workGroupSize * workGroupCount;
330
331         std::ostringstream src;
332         src << "#version 310 es\n"
333                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
334                 << "layout(binding = 0) writeonly buffer Output {\n"
335                 << "    uint values[" << numValues << "];\n"
336                 << "} sb_out;\n\n"
337                 << "shared uint count;\n\n"
338                 << "void main (void) {\n"
339                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
340                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
341                 << "    uint globalOffs = localSize*globalNdx;\n"
342                 << "\n"
343                 << "    count = 0u;\n"
344                 << "    memoryBarrierShared();\n"
345                 << "    barrier();\n"
346                 << "    uint oldVal = atomicAdd(count, 1u);\n"
347                 << "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
348                 << "}\n";
349
350         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
351 }
352
353 TestInstance* SharedVarAtomicOpTest::createInstance (Context& context) const
354 {
355         return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize);
356 }
357
358 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
359         : TestInstance  (context)
360         , m_localSize   (localSize)
361         , m_workSize    (workSize)
362 {
363 }
364
365 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate (void)
366 {
367         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
368         const VkDevice                  device                          = m_context.getDevice();
369         const VkQueue                   queue                           = m_context.getUniversalQueue();
370         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
371         Allocator&                              allocator                       = m_context.getDefaultAllocator();
372
373         const int workGroupSize = multiplyComponents(m_localSize);
374         const int workGroupCount = multiplyComponents(m_workSize);
375
376         // Create a buffer and host-visible memory for it
377
378         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
379         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
380
381         // Create descriptor set
382
383         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
384                 DescriptorSetLayoutBuilder()
385                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
386                 .build(vk, device));
387
388         const Unique<VkDescriptorPool> descriptorPool(
389                 DescriptorPoolBuilder()
390                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
391                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
392
393         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
394
395         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
396         DescriptorSetUpdateBuilder()
397                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
398                 .update(vk, device);
399
400         // Perform the computation
401
402         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
403         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
404         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
405
406         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
407
408         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
409         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
410
411         // Start recording commands
412
413         beginCommandBuffer(vk, *cmdBuffer);
414
415         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
416         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
417
418         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
419
420         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1u, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
421
422         endCommandBuffer(vk, *cmdBuffer);
423
424         // Wait for completion
425
426         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
427
428         // Validate the results
429
430         const Allocation& bufferAllocation = buffer.getAllocation();
431         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
432
433         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
434
435         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
436         {
437                 const int globalOffset = groupNdx * workGroupSize;
438                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
439                 {
440                         const deUint32 res = bufferPtr[globalOffset + localOffset];
441                         const deUint32 ref = localOffset + 1;
442
443                         if (res != ref)
444                         {
445                                 std::ostringstream msg;
446                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
447                                 return tcu::TestStatus::fail(msg.str());
448                         }
449                 }
450         }
451         return tcu::TestStatus::pass("Compute succeeded");
452 }
453
454 class SSBOLocalBarrierTest : public vkt::TestCase
455 {
456 public:
457                                                 SSBOLocalBarrierTest    (tcu::TestContext&      testCtx,
458                                                                                                  const std::string& name,
459                                                                                                  const std::string&     description,
460                                                                                                  const tcu::IVec3&      localSize,
461                                                                                                  const tcu::IVec3&      workSize);
462
463         void                            initPrograms                    (SourceCollections& sourceCollections) const;
464         TestInstance*           createInstance                  (Context&                       context) const;
465
466 private:
467         const tcu::IVec3        m_localSize;
468         const tcu::IVec3        m_workSize;
469 };
470
471 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
472 {
473 public:
474                                                                         SSBOLocalBarrierTestInstance    (Context&                       context,
475                                                                                                                                          const tcu::IVec3&      localSize,
476                                                                                                                                          const tcu::IVec3&      workSize);
477
478         tcu::TestStatus                                 iterate                                                 (void);
479
480 private:
481         const tcu::IVec3                                m_localSize;
482         const tcu::IVec3                                m_workSize;
483 };
484
485 SSBOLocalBarrierTest::SSBOLocalBarrierTest (tcu::TestContext&   testCtx,
486                                                                                         const std::string&      name,
487                                                                                         const std::string&      description,
488                                                                                         const tcu::IVec3&       localSize,
489                                                                                         const tcu::IVec3&       workSize)
490         : TestCase              (testCtx, name, description)
491         , m_localSize   (localSize)
492         , m_workSize    (workSize)
493 {
494 }
495
496 void SSBOLocalBarrierTest::initPrograms (SourceCollections& sourceCollections) const
497 {
498         const int workGroupSize = multiplyComponents(m_localSize);
499         const int workGroupCount = multiplyComponents(m_workSize);
500         const int numValues = workGroupSize * workGroupCount;
501
502         std::ostringstream src;
503         src << "#version 310 es\n"
504                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
505                 << "layout(binding = 0) coherent buffer Output {\n"
506                 << "    uint values[" << numValues << "];\n"
507                 << "} sb_out;\n\n"
508                 << "void main (void) {\n"
509                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
510                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
511                 << "    uint globalOffs = localSize*globalNdx;\n"
512                 << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
513                 << "\n"
514                 << "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
515                 << "    memoryBarrierBuffer();\n"
516                 << "    barrier();\n"
517                 << "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"         // += so we read and write
518                 << "    memoryBarrierBuffer();\n"
519                 << "    barrier();\n"
520                 << "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
521                 << "}\n";
522
523         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
524 }
525
526 TestInstance* SSBOLocalBarrierTest::createInstance (Context& context) const
527 {
528         return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize);
529 }
530
531 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
532         : TestInstance  (context)
533         , m_localSize   (localSize)
534         , m_workSize    (workSize)
535 {
536 }
537
538 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate (void)
539 {
540         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
541         const VkDevice                  device                          = m_context.getDevice();
542         const VkQueue                   queue                           = m_context.getUniversalQueue();
543         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
544         Allocator&                              allocator                       = m_context.getDefaultAllocator();
545
546         const int workGroupSize = multiplyComponents(m_localSize);
547         const int workGroupCount = multiplyComponents(m_workSize);
548
549         // Create a buffer and host-visible memory for it
550
551         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
552         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
553
554         // Create descriptor set
555
556         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
557                 DescriptorSetLayoutBuilder()
558                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
559                 .build(vk, device));
560
561         const Unique<VkDescriptorPool> descriptorPool(
562                 DescriptorPoolBuilder()
563                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
564                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
565
566         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
567
568         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
569         DescriptorSetUpdateBuilder()
570                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
571                 .update(vk, device);
572
573         // Perform the computation
574
575         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
576         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
577         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
578
579         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
580
581         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
582         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
583
584         // Start recording commands
585
586         beginCommandBuffer(vk, *cmdBuffer);
587
588         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
589         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
590
591         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
592
593         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
594
595         endCommandBuffer(vk, *cmdBuffer);
596
597         // Wait for completion
598
599         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
600
601         // Validate the results
602
603         const Allocation& bufferAllocation = buffer.getAllocation();
604         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
605
606         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
607
608         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
609         {
610                 const int globalOffset = groupNdx * workGroupSize;
611                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
612                 {
613                         const deUint32  res             = bufferPtr[globalOffset + localOffset];
614                         const int               offs0   = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) : ((localOffset - 1) % workGroupSize);
615                         const int               offs1   = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) : ((localOffset - 2) % workGroupSize);
616                         const deUint32  ref             = static_cast<deUint32>(globalOffset + offs0 + offs1);
617
618                         if (res != ref)
619                         {
620                                 std::ostringstream msg;
621                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
622                                 return tcu::TestStatus::fail(msg.str());
623                         }
624                 }
625         }
626         return tcu::TestStatus::pass("Compute succeeded");
627 }
628
629 class CopyImageToSSBOTest : public vkt::TestCase
630 {
631 public:
632                                                 CopyImageToSSBOTest             (tcu::TestContext&      testCtx,
633                                                                                                  const std::string&     name,
634                                                                                                  const std::string&     description,
635                                                                                                  const tcu::IVec2&      localSize,
636                                                                                                  const tcu::IVec2&      imageSize);
637
638         void                            initPrograms                    (SourceCollections& sourceCollections) const;
639         TestInstance*           createInstance                  (Context&                       context) const;
640
641 private:
642         const tcu::IVec2        m_localSize;
643         const tcu::IVec2        m_imageSize;
644 };
645
646 class CopyImageToSSBOTestInstance : public vkt::TestInstance
647 {
648 public:
649                                                                         CopyImageToSSBOTestInstance             (Context&                       context,
650                                                                                                                                          const tcu::IVec2&      localSize,
651                                                                                                                                          const tcu::IVec2&      imageSize);
652
653         tcu::TestStatus                                 iterate                                                 (void);
654
655 private:
656         const tcu::IVec2                                m_localSize;
657         const tcu::IVec2                                m_imageSize;
658 };
659
660 CopyImageToSSBOTest::CopyImageToSSBOTest (tcu::TestContext&             testCtx,
661                                                                                   const std::string&    name,
662                                                                                   const std::string&    description,
663                                                                                   const tcu::IVec2&             localSize,
664                                                                                   const tcu::IVec2&             imageSize)
665         : TestCase              (testCtx, name, description)
666         , m_localSize   (localSize)
667         , m_imageSize   (imageSize)
668 {
669         DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
670         DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
671 }
672
673 void CopyImageToSSBOTest::initPrograms (SourceCollections& sourceCollections) const
674 {
675         std::ostringstream src;
676         src << "#version 310 es\n"
677                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
678                 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
679                 << "layout(binding = 0) writeonly buffer Output {\n"
680                 << "    uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
681                 << "} sb_out;\n\n"
682                 << "void main (void) {\n"
683                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
684                 << "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
685                 << "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
686                 << "}\n";
687
688         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
689 }
690
691 TestInstance* CopyImageToSSBOTest::createInstance (Context& context) const
692 {
693         return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize);
694 }
695
696 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
697         : TestInstance  (context)
698         , m_localSize   (localSize)
699         , m_imageSize   (imageSize)
700 {
701 }
702
703 tcu::TestStatus CopyImageToSSBOTestInstance::iterate (void)
704 {
705         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
706         const VkDevice                  device                          = m_context.getDevice();
707         const VkQueue                   queue                           = m_context.getUniversalQueue();
708         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
709         Allocator&                              allocator                       = m_context.getDefaultAllocator();
710
711         // Create an image
712
713         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
714         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
715
716         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
717         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
718
719         // Staging buffer (source data for image)
720
721         const deUint32 imageArea = multiplyComponents(m_imageSize);
722         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
723
724         const Buffer stagingBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
725
726         // Populate the staging buffer with test data
727         {
728                 de::Random rnd(0xab2c7);
729                 const Allocation& stagingBufferAllocation = stagingBuffer.getAllocation();
730                 deUint32* bufferPtr = static_cast<deUint32*>(stagingBufferAllocation.getHostPtr());
731                 for (deUint32 i = 0; i < imageArea; ++i)
732                         *bufferPtr++ = rnd.getUint32();
733
734                 flushMappedMemoryRange(vk, device, stagingBufferAllocation.getMemory(), stagingBufferAllocation.getOffset(), bufferSizeBytes);
735         }
736
737         // Create a buffer to store shader output
738
739         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
740
741         // Create descriptor set
742
743         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
744                 DescriptorSetLayoutBuilder()
745                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
746                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
747                 .build(vk, device));
748
749         const Unique<VkDescriptorPool> descriptorPool(
750                 DescriptorPoolBuilder()
751                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
752                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
753                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
754
755         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
756
757         // Set the bindings
758
759         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
760         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
761
762         DescriptorSetUpdateBuilder()
763                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
764                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
765                 .update(vk, device);
766
767         // Perform the computation
768         {
769                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
770                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
771                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
772
773                 const VkBufferMemoryBarrier stagingBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *stagingBuffer, 0ull, bufferSizeBytes);
774
775                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
776                         0u, 0u,
777                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
778                         *image, subresourceRange);
779
780                 const VkImageMemoryBarrier imagePostCopyBarrier = makeImageMemoryBarrier(
781                         VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
782                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
783                         *image, subresourceRange);
784
785                 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
786
787                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
788                 const tcu::IVec2 workSize = m_imageSize / m_localSize;
789
790                 // Prepare the command buffer
791
792                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
793                 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
794
795                 // Start recording commands
796
797                 beginCommandBuffer(vk, *cmdBuffer);
798
799                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
800                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
801
802                 vk.cmdPipelineBarrier(*cmdBuffer, 0u, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &stagingBufferPostHostWriteBarrier, 1, &imagePreCopyBarrier);
803                 vk.cmdCopyBufferToImage(*cmdBuffer, *stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &copyParams);
804                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePostCopyBarrier);
805
806                 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
807                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
808
809                 endCommandBuffer(vk, *cmdBuffer);
810
811                 // Wait for completion
812
813                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
814         }
815
816         // Validate the results
817
818         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
819         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
820
821         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
822         const deUint32* refBufferPtr = static_cast<deUint32*>(stagingBuffer.getAllocation().getHostPtr());
823
824         for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
825         {
826                 const deUint32 res = *(bufferPtr + ndx);
827                 const deUint32 ref = *(refBufferPtr + ndx);
828
829                 if (res != ref)
830                 {
831                         std::ostringstream msg;
832                         msg << "Comparison failed for Output.values[" << ndx << "]";
833                         return tcu::TestStatus::fail(msg.str());
834                 }
835         }
836         return tcu::TestStatus::pass("Compute succeeded");
837 }
838
839 class CopySSBOToImageTest : public vkt::TestCase
840 {
841 public:
842                                                 CopySSBOToImageTest     (tcu::TestContext&      testCtx,
843                                                                                          const std::string&     name,
844                                                                                          const std::string&     description,
845                                                                                          const tcu::IVec2&      localSize,
846                                                                                          const tcu::IVec2&      imageSize);
847
848         void                            initPrograms            (SourceCollections& sourceCollections) const;
849         TestInstance*           createInstance          (Context&                       context) const;
850
851 private:
852         const tcu::IVec2        m_localSize;
853         const tcu::IVec2        m_imageSize;
854 };
855
856 class CopySSBOToImageTestInstance : public vkt::TestInstance
857 {
858 public:
859                                                                         CopySSBOToImageTestInstance     (Context&                       context,
860                                                                                                                                  const tcu::IVec2&      localSize,
861                                                                                                                                  const tcu::IVec2&      imageSize);
862
863         tcu::TestStatus                                 iterate                                         (void);
864
865 private:
866         const tcu::IVec2                                m_localSize;
867         const tcu::IVec2                                m_imageSize;
868 };
869
870 CopySSBOToImageTest::CopySSBOToImageTest (tcu::TestContext&             testCtx,
871                                                                                   const std::string&    name,
872                                                                                   const std::string&    description,
873                                                                                   const tcu::IVec2&             localSize,
874                                                                                   const tcu::IVec2&             imageSize)
875         : TestCase              (testCtx, name, description)
876         , m_localSize   (localSize)
877         , m_imageSize   (imageSize)
878 {
879         DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
880         DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
881 }
882
883 void CopySSBOToImageTest::initPrograms (SourceCollections& sourceCollections) const
884 {
885         std::ostringstream src;
886         src << "#version 310 es\n"
887                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
888                 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
889                 << "layout(binding = 0) readonly buffer Input {\n"
890                 << "    uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
891                 << "} sb_in;\n\n"
892                 << "void main (void) {\n"
893                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
894                 << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
895                 << "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
896                 << "}\n";
897
898         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
899 }
900
901 TestInstance* CopySSBOToImageTest::createInstance (Context& context) const
902 {
903         return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize);
904 }
905
906 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
907         : TestInstance  (context)
908         , m_localSize   (localSize)
909         , m_imageSize   (imageSize)
910 {
911 }
912
913 tcu::TestStatus CopySSBOToImageTestInstance::iterate (void)
914 {
915         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
916         const VkDevice                  device                          = m_context.getDevice();
917         const VkQueue                   queue                           = m_context.getUniversalQueue();
918         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
919         Allocator&                              allocator                       = m_context.getDefaultAllocator();
920
921         // Create an image
922
923         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
924         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
925
926         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
927         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
928
929         // Create an input buffer (data to be read in the shader)
930
931         const deUint32 imageArea = multiplyComponents(m_imageSize);
932         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
933
934         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
935
936         // Populate the buffer with test data
937         {
938                 de::Random rnd(0x77238ac2);
939                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
940                 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
941                 for (deUint32 i = 0; i < imageArea; ++i)
942                         *bufferPtr++ = rnd.getUint32();
943
944                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
945         }
946
947         // Create a buffer to store shader output (copied from image data)
948
949         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
950
951         // Create descriptor set
952
953         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
954                 DescriptorSetLayoutBuilder()
955                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
956                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
957                 .build(vk, device));
958
959         const Unique<VkDescriptorPool> descriptorPool(
960                 DescriptorPoolBuilder()
961                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
962                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
963                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
964
965         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
966
967         // Set the bindings
968
969         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
970         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
971
972         DescriptorSetUpdateBuilder()
973                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
974                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
975                 .update(vk, device);
976
977         // Perform the computation
978         {
979                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
980                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
981                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
982
983                 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
984
985                 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
986                         0u, 0u,
987                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
988                         *image, subresourceRange);
989
990                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
991                         VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
992                         VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
993                         *image, subresourceRange);
994
995                 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
996
997                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
998                 const tcu::IVec2 workSize = m_imageSize / m_localSize;
999
1000                 // Prepare the command buffer
1001
1002                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1003                 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1004
1005                 // Start recording commands
1006
1007                 beginCommandBuffer(vk, *cmdBuffer);
1008
1009                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1010                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1011
1012                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
1013                 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1014
1015                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
1016                 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &copyParams);
1017                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1018
1019                 endCommandBuffer(vk, *cmdBuffer);
1020
1021                 // Wait for completion
1022
1023                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1024         }
1025
1026         // Validate the results
1027
1028         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1029         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1030
1031         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1032         const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1033
1034         for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
1035         {
1036                 const deUint32 res = *(bufferPtr + ndx);
1037                 const deUint32 ref = *(refBufferPtr + ndx);
1038
1039                 if (res != ref)
1040                 {
1041                         std::ostringstream msg;
1042                         msg << "Comparison failed for pixel " << ndx;
1043                         return tcu::TestStatus::fail(msg.str());
1044                 }
1045         }
1046         return tcu::TestStatus::pass("Compute succeeded");
1047 }
1048
1049 class BufferToBufferInvertTest : public vkt::TestCase
1050 {
1051 public:
1052         void                                                            initPrograms                            (SourceCollections&     sourceCollections) const;
1053         TestInstance*                                           createInstance                          (Context&                       context) const;
1054
1055         static BufferToBufferInvertTest*        UBOToSSBOInvertCase                     (tcu::TestContext&      testCtx,
1056                                                                                                                                          const std::string& name,
1057                                                                                                                                          const std::string& description,
1058                                                                                                                                          const deUint32         numValues,
1059                                                                                                                                          const tcu::IVec3&      localSize,
1060                                                                                                                                          const tcu::IVec3&      workSize);
1061
1062         static BufferToBufferInvertTest*        CopyInvertSSBOCase                      (tcu::TestContext&      testCtx,
1063                                                                                                                                          const std::string& name,
1064                                                                                                                                          const std::string& description,
1065                                                                                                                                          const deUint32         numValues,
1066                                                                                                                                          const tcu::IVec3&      localSize,
1067                                                                                                                                          const tcu::IVec3&      workSize);
1068
1069 private:
1070                                                                                 BufferToBufferInvertTest        (tcu::TestContext&      testCtx,
1071                                                                                                                                          const std::string& name,
1072                                                                                                                                          const std::string& description,
1073                                                                                                                                          const deUint32         numValues,
1074                                                                                                                                          const tcu::IVec3&      localSize,
1075                                                                                                                                          const tcu::IVec3&      workSize,
1076                                                                                                                                          const BufferType       bufferType);
1077
1078         const BufferType                                        m_bufferType;
1079         const deUint32                                          m_numValues;
1080         const tcu::IVec3                                        m_localSize;
1081         const tcu::IVec3                                        m_workSize;
1082 };
1083
1084 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1085 {
1086 public:
1087                                                                         BufferToBufferInvertTestInstance        (Context&                       context,
1088                                                                                                                                                  const deUint32         numValues,
1089                                                                                                                                                  const tcu::IVec3&      localSize,
1090                                                                                                                                                  const tcu::IVec3&      workSize,
1091                                                                                                                                                  const BufferType       bufferType);
1092
1093         tcu::TestStatus                                 iterate                                                         (void);
1094
1095 private:
1096         const BufferType                                m_bufferType;
1097         const deUint32                                  m_numValues;
1098         const tcu::IVec3                                m_localSize;
1099         const tcu::IVec3                                m_workSize;
1100 };
1101
1102 BufferToBufferInvertTest::BufferToBufferInvertTest (tcu::TestContext&   testCtx,
1103                                                                                                         const std::string&      name,
1104                                                                                                         const std::string&      description,
1105                                                                                                         const deUint32          numValues,
1106                                                                                                         const tcu::IVec3&       localSize,
1107                                                                                                         const tcu::IVec3&       workSize,
1108                                                                                                         const BufferType        bufferType)
1109         : TestCase              (testCtx, name, description)
1110         , m_bufferType  (bufferType)
1111         , m_numValues   (numValues)
1112         , m_localSize   (localSize)
1113         , m_workSize    (workSize)
1114 {
1115         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1116         DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1117 }
1118
1119 BufferToBufferInvertTest* BufferToBufferInvertTest::UBOToSSBOInvertCase (tcu::TestContext&      testCtx,
1120                                                                                                                                                  const std::string&     name,
1121                                                                                                                                                  const std::string&     description,
1122                                                                                                                                                  const deUint32         numValues,
1123                                                                                                                                                  const tcu::IVec3&      localSize,
1124                                                                                                                                                  const tcu::IVec3&      workSize)
1125 {
1126         return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM);
1127 }
1128
1129 BufferToBufferInvertTest* BufferToBufferInvertTest::CopyInvertSSBOCase (tcu::TestContext&       testCtx,
1130                                                                                                                                                 const std::string&      name,
1131                                                                                                                                                 const std::string&      description,
1132                                                                                                                                                 const deUint32          numValues,
1133                                                                                                                                                 const tcu::IVec3&       localSize,
1134                                                                                                                                                 const tcu::IVec3&       workSize)
1135 {
1136         return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_SSBO);
1137 }
1138
1139 void BufferToBufferInvertTest::initPrograms (SourceCollections& sourceCollections) const
1140 {
1141         std::ostringstream src;
1142         if (m_bufferType == BUFFER_TYPE_UNIFORM)
1143         {
1144                 src << "#version 310 es\n"
1145                         << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1146                         << "layout(binding = 0) readonly uniform Input {\n"
1147                         << "    uint values[" << m_numValues << "];\n"
1148                         << "} ub_in;\n"
1149                         << "layout(binding = 1, std140) writeonly buffer Output {\n"
1150                         << "    uint values[" << m_numValues << "];\n"
1151                         << "} sb_out;\n"
1152                         << "void main (void) {\n"
1153                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1154                         << "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1155                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1156                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
1157                         << "\n"
1158                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1159                         << "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1160                         << "}\n";
1161         }
1162         else if (m_bufferType == BUFFER_TYPE_SSBO)
1163         {
1164                 src << "#version 310 es\n"
1165                         << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1166                         << "layout(binding = 0, std140) readonly buffer Input {\n"
1167                         << "    uint values[" << m_numValues << "];\n"
1168                         << "} sb_in;\n"
1169                         << "layout (binding = 1, std140) writeonly buffer Output {\n"
1170                         << "    uint values[" << m_numValues << "];\n"
1171                         << "} sb_out;\n"
1172                         << "void main (void) {\n"
1173                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1174                         << "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1175                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1176                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
1177                         << "\n"
1178                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1179                         << "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1180                         << "}\n";
1181         }
1182
1183         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1184 }
1185
1186 TestInstance* BufferToBufferInvertTest::createInstance (Context& context) const
1187 {
1188         return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType);
1189 }
1190
1191 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance (Context&                    context,
1192                                                                                                                                         const deUint32          numValues,
1193                                                                                                                                         const tcu::IVec3&       localSize,
1194                                                                                                                                         const tcu::IVec3&       workSize,
1195                                                                                                                                         const BufferType        bufferType)
1196         : TestInstance  (context)
1197         , m_bufferType  (bufferType)
1198         , m_numValues   (numValues)
1199         , m_localSize   (localSize)
1200         , m_workSize    (workSize)
1201 {
1202 }
1203
1204 tcu::TestStatus BufferToBufferInvertTestInstance::iterate (void)
1205 {
1206         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1207         const VkDevice                  device                          = m_context.getDevice();
1208         const VkQueue                   queue                           = m_context.getUniversalQueue();
1209         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1210         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1211
1212         // Customize the test based on buffer type
1213
1214         const VkBufferUsageFlags inputBufferUsageFlags          = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1215         const VkDescriptorType inputBufferDescriptorType        = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1216         const deUint32 randomSeed                                                       = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1217
1218         // Create an input buffer
1219
1220         const VkDeviceSize bufferSizeBytes = sizeof(tcu::UVec4) * m_numValues;
1221         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags), MemoryRequirement::HostVisible);
1222
1223         // Fill the input buffer with data
1224         {
1225                 de::Random rnd(randomSeed);
1226                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
1227                 tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(inputBufferAllocation.getHostPtr());
1228                 for (deUint32 i = 0; i < m_numValues; ++i)
1229                         bufferPtr[i].x() = rnd.getUint32();
1230
1231                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
1232         }
1233
1234         // Create an output buffer
1235
1236         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1237
1238         // Create descriptor set
1239
1240         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1241                 DescriptorSetLayoutBuilder()
1242                 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1243                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1244                 .build(vk, device));
1245
1246         const Unique<VkDescriptorPool> descriptorPool(
1247                 DescriptorPoolBuilder()
1248                 .addType(inputBufferDescriptorType)
1249                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1250                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1251
1252         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1253
1254         const VkDescriptorBufferInfo inputBufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1255         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1256         DescriptorSetUpdateBuilder()
1257                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType, &inputBufferDescriptorInfo)
1258                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1259                 .update(vk, device);
1260
1261         // Perform the computation
1262
1263         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1264         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1265         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1266
1267         const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1268
1269         const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1270
1271         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1272         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1273
1274         // Start recording commands
1275
1276         beginCommandBuffer(vk, *cmdBuffer);
1277
1278         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1279         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1280
1281         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1282         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1283         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1284
1285         endCommandBuffer(vk, *cmdBuffer);
1286
1287         // Wait for completion
1288
1289         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1290
1291         // Validate the results
1292
1293         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1294         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1295
1296         const tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(outputBufferAllocation.getHostPtr());
1297         const tcu::UVec4* refBufferPtr = static_cast<tcu::UVec4*>(inputBuffer.getAllocation().getHostPtr());
1298
1299         for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1300         {
1301                 const deUint32 res = bufferPtr[ndx].x();
1302                 const deUint32 ref = ~refBufferPtr[ndx].x();
1303
1304                 if (res != ref)
1305                 {
1306                         std::ostringstream msg;
1307                         msg << "Comparison failed for Output.values[" << ndx << "]";
1308                         return tcu::TestStatus::fail(msg.str());
1309                 }
1310         }
1311         return tcu::TestStatus::pass("Compute succeeded");
1312 }
1313
1314 class InvertSSBOInPlaceTest : public vkt::TestCase
1315 {
1316 public:
1317                                                 InvertSSBOInPlaceTest   (tcu::TestContext&      testCtx,
1318                                                                                                  const std::string&     name,
1319                                                                                                  const std::string&     description,
1320                                                                                                  const deUint32         numValues,
1321                                                                                                  const bool                     sized,
1322                                                                                                  const tcu::IVec3&      localSize,
1323                                                                                                  const tcu::IVec3&      workSize);
1324
1325
1326         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1327         TestInstance*           createInstance                  (Context&                       context) const;
1328
1329 private:
1330         const deUint32          m_numValues;
1331         const bool                      m_sized;
1332         const tcu::IVec3        m_localSize;
1333         const tcu::IVec3        m_workSize;
1334 };
1335
1336 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1337 {
1338 public:
1339                                                                         InvertSSBOInPlaceTestInstance   (Context&                       context,
1340                                                                                                                                          const deUint32         numValues,
1341                                                                                                                                          const tcu::IVec3&      localSize,
1342                                                                                                                                          const tcu::IVec3&      workSize);
1343
1344         tcu::TestStatus                                 iterate                                                 (void);
1345
1346 private:
1347         const deUint32                                  m_numValues;
1348         const tcu::IVec3                                m_localSize;
1349         const tcu::IVec3                                m_workSize;
1350 };
1351
1352 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest (tcu::TestContext&         testCtx,
1353                                                                                           const std::string&    name,
1354                                                                                           const std::string&    description,
1355                                                                                           const deUint32                numValues,
1356                                                                                           const bool                    sized,
1357                                                                                           const tcu::IVec3&             localSize,
1358                                                                                           const tcu::IVec3&             workSize)
1359         : TestCase              (testCtx, name, description)
1360         , m_numValues   (numValues)
1361         , m_sized               (sized)
1362         , m_localSize   (localSize)
1363         , m_workSize    (workSize)
1364 {
1365         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1366 }
1367
1368 void InvertSSBOInPlaceTest::initPrograms (SourceCollections& sourceCollections) const
1369 {
1370         std::ostringstream src;
1371         src << "#version 310 es\n"
1372                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1373                 << "layout(binding = 0) buffer InOut {\n"
1374                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1375                 << "} sb_inout;\n"
1376                 << "void main (void) {\n"
1377                 << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1378                 << "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1379                 << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1380                 << "    uint offset          = numValuesPerInv*groupNdx;\n"
1381                 << "\n"
1382                 << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1383                 << "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1384                 << "}\n";
1385
1386         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1387 }
1388
1389 TestInstance* InvertSSBOInPlaceTest::createInstance (Context& context) const
1390 {
1391         return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize);
1392 }
1393
1394 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance (Context&                  context,
1395                                                                                                                           const deUint32        numValues,
1396                                                                                                                           const tcu::IVec3&     localSize,
1397                                                                                                                           const tcu::IVec3&     workSize)
1398         : TestInstance  (context)
1399         , m_numValues   (numValues)
1400         , m_localSize   (localSize)
1401         , m_workSize    (workSize)
1402 {
1403 }
1404
1405 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate (void)
1406 {
1407         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1408         const VkDevice                  device                          = m_context.getDevice();
1409         const VkQueue                   queue                           = m_context.getUniversalQueue();
1410         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1411         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1412
1413         // Create an input/output buffer
1414
1415         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1416         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1417
1418         // Fill the buffer with data
1419
1420         typedef std::vector<deUint32> data_vector_t;
1421         data_vector_t inputData(m_numValues);
1422
1423         {
1424                 de::Random rnd(0x82ce7f);
1425                 const Allocation& bufferAllocation = buffer.getAllocation();
1426                 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1427                 for (deUint32 i = 0; i < m_numValues; ++i)
1428                         inputData[i] = *bufferPtr++ = rnd.getUint32();
1429
1430                 flushMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1431         }
1432
1433         // Create descriptor set
1434
1435         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1436                 DescriptorSetLayoutBuilder()
1437                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1438                 .build(vk, device));
1439
1440         const Unique<VkDescriptorPool> descriptorPool(
1441                 DescriptorPoolBuilder()
1442                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1443                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1444
1445         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1446
1447         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1448         DescriptorSetUpdateBuilder()
1449                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1450                 .update(vk, device);
1451
1452         // Perform the computation
1453
1454         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1455         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1456         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1457
1458         const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1459
1460         const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1461
1462         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1463         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1464
1465         // Start recording commands
1466
1467         beginCommandBuffer(vk, *cmdBuffer);
1468
1469         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1470         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1471
1472         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1473         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1474         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1475
1476         endCommandBuffer(vk, *cmdBuffer);
1477
1478         // Wait for completion
1479
1480         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1481
1482         // Validate the results
1483
1484         const Allocation& bufferAllocation = buffer.getAllocation();
1485         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1486
1487         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1488
1489         for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1490         {
1491                 const deUint32 res = bufferPtr[ndx];
1492                 const deUint32 ref = ~inputData[ndx];
1493
1494                 if (res != ref)
1495                 {
1496                         std::ostringstream msg;
1497                         msg << "Comparison failed for InOut.values[" << ndx << "]";
1498                         return tcu::TestStatus::fail(msg.str());
1499                 }
1500         }
1501         return tcu::TestStatus::pass("Compute succeeded");
1502 }
1503
1504 class WriteToMultipleSSBOTest : public vkt::TestCase
1505 {
1506 public:
1507                                                 WriteToMultipleSSBOTest (tcu::TestContext&      testCtx,
1508                                                                                                  const std::string&     name,
1509                                                                                                  const std::string&     description,
1510                                                                                                  const deUint32         numValues,
1511                                                                                                  const bool                     sized,
1512                                                                                                  const tcu::IVec3&      localSize,
1513                                                                                                  const tcu::IVec3&      workSize);
1514
1515         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1516         TestInstance*           createInstance                  (Context&                       context) const;
1517
1518 private:
1519         const deUint32          m_numValues;
1520         const bool                      m_sized;
1521         const tcu::IVec3        m_localSize;
1522         const tcu::IVec3        m_workSize;
1523 };
1524
1525 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1526 {
1527 public:
1528                                                                         WriteToMultipleSSBOTestInstance (Context&                       context,
1529                                                                                                                                          const deUint32         numValues,
1530                                                                                                                                          const tcu::IVec3&      localSize,
1531                                                                                                                                          const tcu::IVec3&      workSize);
1532
1533         tcu::TestStatus                                 iterate                                                 (void);
1534
1535 private:
1536         const deUint32                                  m_numValues;
1537         const tcu::IVec3                                m_localSize;
1538         const tcu::IVec3                                m_workSize;
1539 };
1540
1541 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest (tcu::TestContext&             testCtx,
1542                                                                                                   const std::string&    name,
1543                                                                                                   const std::string&    description,
1544                                                                                                   const deUint32                numValues,
1545                                                                                                   const bool                    sized,
1546                                                                                                   const tcu::IVec3&             localSize,
1547                                                                                                   const tcu::IVec3&             workSize)
1548         : TestCase              (testCtx, name, description)
1549         , m_numValues   (numValues)
1550         , m_sized               (sized)
1551         , m_localSize   (localSize)
1552         , m_workSize    (workSize)
1553 {
1554         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1555 }
1556
1557 void WriteToMultipleSSBOTest::initPrograms (SourceCollections& sourceCollections) const
1558 {
1559         std::ostringstream src;
1560         src << "#version 310 es\n"
1561                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1562                 << "layout(binding = 0) writeonly buffer Out0 {\n"
1563                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1564                 << "} sb_out0;\n"
1565                 << "layout(binding = 1) writeonly buffer Out1 {\n"
1566                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1567                 << "} sb_out1;\n"
1568                 << "void main (void) {\n"
1569                 << "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1570                 << "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1571                 << "\n"
1572                 << "    {\n"
1573                 << "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1574                 << "        uint offset          = numValuesPerInv*groupNdx;\n"
1575                 << "\n"
1576                 << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1577                 << "            sb_out0.values[offset + ndx] = offset + ndx;\n"
1578                 << "    }\n"
1579                 << "    {\n"
1580                 << "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1581                 << "        uint offset          = numValuesPerInv*groupNdx;\n"
1582                 << "\n"
1583                 << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1584                 << "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1585                 << "    }\n"
1586                 << "}\n";
1587
1588         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1589 }
1590
1591 TestInstance* WriteToMultipleSSBOTest::createInstance (Context& context) const
1592 {
1593         return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize);
1594 }
1595
1596 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance (Context&                      context,
1597                                                                                                                                   const deUint32        numValues,
1598                                                                                                                                   const tcu::IVec3&     localSize,
1599                                                                                                                                   const tcu::IVec3&     workSize)
1600         : TestInstance  (context)
1601         , m_numValues   (numValues)
1602         , m_localSize   (localSize)
1603         , m_workSize    (workSize)
1604 {
1605 }
1606
1607 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate (void)
1608 {
1609         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1610         const VkDevice                  device                          = m_context.getDevice();
1611         const VkQueue                   queue                           = m_context.getUniversalQueue();
1612         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1613         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1614
1615         // Create two output buffers
1616
1617         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1618         const Buffer buffer0(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1619         const Buffer buffer1(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1620
1621         // Create descriptor set
1622
1623         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1624                 DescriptorSetLayoutBuilder()
1625                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1626                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1627                 .build(vk, device));
1628
1629         const Unique<VkDescriptorPool> descriptorPool(
1630                 DescriptorPoolBuilder()
1631                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1632                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1633
1634         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1635
1636         const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1637         const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1638         DescriptorSetUpdateBuilder()
1639                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1640                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1641                 .update(vk, device);
1642
1643         // Perform the computation
1644
1645         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1646         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1647         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1648
1649         const VkBufferMemoryBarrier shaderWriteBarriers[] =
1650         {
1651                 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes),
1652                 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes)
1653         };
1654
1655         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1656         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1657
1658         // Start recording commands
1659
1660         beginCommandBuffer(vk, *cmdBuffer);
1661
1662         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1663         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1664
1665         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1666         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, DE_LENGTH_OF_ARRAY(shaderWriteBarriers), shaderWriteBarriers, 0, (const VkImageMemoryBarrier*)DE_NULL);
1667
1668         endCommandBuffer(vk, *cmdBuffer);
1669
1670         // Wait for completion
1671
1672         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1673
1674         // Validate the results
1675         {
1676                 const Allocation& buffer0Allocation = buffer0.getAllocation();
1677                 invalidateMappedMemoryRange(vk, device, buffer0Allocation.getMemory(), buffer0Allocation.getOffset(), bufferSizeBytes);
1678                 const deUint32* buffer0Ptr = static_cast<deUint32*>(buffer0Allocation.getHostPtr());
1679
1680                 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1681                 {
1682                         const deUint32 res = buffer0Ptr[ndx];
1683                         const deUint32 ref = ndx;
1684
1685                         if (res != ref)
1686                         {
1687                                 std::ostringstream msg;
1688                                 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1689                                 return tcu::TestStatus::fail(msg.str());
1690                         }
1691                 }
1692         }
1693         {
1694                 const Allocation& buffer1Allocation = buffer1.getAllocation();
1695                 invalidateMappedMemoryRange(vk, device, buffer1Allocation.getMemory(), buffer1Allocation.getOffset(), bufferSizeBytes);
1696                 const deUint32* buffer1Ptr = static_cast<deUint32*>(buffer1Allocation.getHostPtr());
1697
1698                 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1699                 {
1700                         const deUint32 res = buffer1Ptr[ndx];
1701                         const deUint32 ref = m_numValues - ndx;
1702
1703                         if (res != ref)
1704                         {
1705                                 std::ostringstream msg;
1706                                 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1707                                 return tcu::TestStatus::fail(msg.str());
1708                         }
1709                 }
1710         }
1711         return tcu::TestStatus::pass("Compute succeeded");
1712 }
1713
1714 class SSBOBarrierTest : public vkt::TestCase
1715 {
1716 public:
1717                                                 SSBOBarrierTest         (tcu::TestContext&      testCtx,
1718                                                                                          const std::string&     name,
1719                                                                                          const std::string&     description,
1720                                                                                          const tcu::IVec3&      workSize);
1721
1722         void                            initPrograms            (SourceCollections& sourceCollections) const;
1723         TestInstance*           createInstance          (Context&                       context) const;
1724
1725 private:
1726         const tcu::IVec3        m_workSize;
1727 };
1728
1729 class SSBOBarrierTestInstance : public vkt::TestInstance
1730 {
1731 public:
1732                                                                         SSBOBarrierTestInstance         (Context&                       context,
1733                                                                                                                                  const tcu::IVec3&      workSize);
1734
1735         tcu::TestStatus                                 iterate                                         (void);
1736
1737 private:
1738         const tcu::IVec3                                m_workSize;
1739 };
1740
1741 SSBOBarrierTest::SSBOBarrierTest (tcu::TestContext&             testCtx,
1742                                                                   const std::string&    name,
1743                                                                   const std::string&    description,
1744                                                                   const tcu::IVec3&             workSize)
1745         : TestCase              (testCtx, name, description)
1746         , m_workSize    (workSize)
1747 {
1748 }
1749
1750 void SSBOBarrierTest::initPrograms (SourceCollections& sourceCollections) const
1751 {
1752         sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
1753                 "#version 310 es\n"
1754                 "layout (local_size_x = 1) in;\n"
1755                 "layout(binding = 2) readonly uniform Constants {\n"
1756                 "    uint u_baseVal;\n"
1757                 "};\n"
1758                 "layout(binding = 1) writeonly buffer Output {\n"
1759                 "    uint values[];\n"
1760                 "};\n"
1761                 "void main (void) {\n"
1762                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1763                 "    values[offset] = u_baseVal + offset;\n"
1764                 "}\n");
1765
1766         sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
1767                 "#version 310 es\n"
1768                 "layout (local_size_x = 1) in;\n"
1769                 "layout(binding = 1) readonly buffer Input {\n"
1770                 "    uint values[];\n"
1771                 "};\n"
1772                 "layout(binding = 0) coherent buffer Output {\n"
1773                 "    uint sum;\n"
1774                 "};\n"
1775                 "void main (void) {\n"
1776                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1777                 "    uint value  = values[offset];\n"
1778                 "    atomicAdd(sum, value);\n"
1779                 "}\n");
1780 }
1781
1782 TestInstance* SSBOBarrierTest::createInstance (Context& context) const
1783 {
1784         return new SSBOBarrierTestInstance(context, m_workSize);
1785 }
1786
1787 SSBOBarrierTestInstance::SSBOBarrierTestInstance (Context& context, const tcu::IVec3& workSize)
1788         : TestInstance  (context)
1789         , m_workSize    (workSize)
1790 {
1791 }
1792
1793 tcu::TestStatus SSBOBarrierTestInstance::iterate (void)
1794 {
1795         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1796         const VkDevice                  device                          = m_context.getDevice();
1797         const VkQueue                   queue                           = m_context.getUniversalQueue();
1798         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1799         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1800
1801         // Create a work buffer used by both shaders
1802
1803         const int workGroupCount = multiplyComponents(m_workSize);
1804         const VkDeviceSize workBufferSizeBytes = sizeof(deUint32) * workGroupCount;
1805         const Buffer workBuffer(vk, device, allocator, makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any);
1806
1807         // Create an output buffer
1808
1809         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
1810         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1811
1812         // Initialize atomic counter value to zero
1813         {
1814                 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1815                 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1816                 *outputBufferPtr = 0;
1817                 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1818         }
1819
1820         // Create a uniform buffer (to pass uniform constants)
1821
1822         const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
1823         const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
1824
1825         // Set the constants in the uniform buffer
1826
1827         const deUint32  baseValue = 127;
1828         {
1829                 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
1830                 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
1831                 uniformBufferPtr[0] = baseValue;
1832
1833                 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
1834         }
1835
1836         // Create descriptor set
1837
1838         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1839                 DescriptorSetLayoutBuilder()
1840                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1841                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1842                 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1843                 .build(vk, device));
1844
1845         const Unique<VkDescriptorPool> descriptorPool(
1846                 DescriptorPoolBuilder()
1847                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1848                 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1849                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1850
1851         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1852
1853         const VkDescriptorBufferInfo workBufferDescriptorInfo = makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1854         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1855         const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1856         DescriptorSetUpdateBuilder()
1857                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1858                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1859                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1860                 .update(vk, device);
1861
1862         // Perform the computation
1863
1864         const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
1865         const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
1866
1867         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1868         const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
1869         const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
1870
1871         const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
1872
1873         const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
1874
1875         const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
1876
1877         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1878         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1879
1880         // Start recording commands
1881
1882         beginCommandBuffer(vk, *cmdBuffer);
1883
1884         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
1885         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1886
1887         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1888
1889         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1890         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &betweenShadersBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1891
1892         // Switch to the second shader program
1893         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
1894
1895         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1896         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1897
1898         endCommandBuffer(vk, *cmdBuffer);
1899
1900         // Wait for completion
1901
1902         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1903
1904         // Validate the results
1905
1906         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1907         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1908
1909         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1910         const deUint32  res = *bufferPtr;
1911         deUint32                ref = 0;
1912
1913         for (int ndx = 0; ndx < workGroupCount; ++ndx)
1914                 ref += baseValue + ndx;
1915
1916         if (res != ref)
1917         {
1918                 std::ostringstream msg;
1919                 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
1920                 return tcu::TestStatus::fail(msg.str());
1921         }
1922         return tcu::TestStatus::pass("Compute succeeded");
1923 }
1924
1925 class ImageAtomicOpTest : public vkt::TestCase
1926 {
1927 public:
1928                                                 ImageAtomicOpTest               (tcu::TestContext&      testCtx,
1929                                                                                                  const std::string& name,
1930                                                                                                  const std::string& description,
1931                                                                                                  const deUint32         localSize,
1932                                                                                                  const tcu::IVec2&      imageSize);
1933
1934         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1935         TestInstance*           createInstance                  (Context&                       context) const;
1936
1937 private:
1938         const deUint32          m_localSize;
1939         const tcu::IVec2        m_imageSize;
1940 };
1941
1942 class ImageAtomicOpTestInstance : public vkt::TestInstance
1943 {
1944 public:
1945                                                                         ImageAtomicOpTestInstance               (Context&                       context,
1946                                                                                                                                          const deUint32         localSize,
1947                                                                                                                                          const tcu::IVec2&      imageSize);
1948
1949         tcu::TestStatus                                 iterate                                                 (void);
1950
1951 private:
1952         const deUint32                                  m_localSize;
1953         const tcu::IVec2                                m_imageSize;
1954 };
1955
1956 ImageAtomicOpTest::ImageAtomicOpTest (tcu::TestContext&         testCtx,
1957                                                                           const std::string&    name,
1958                                                                           const std::string&    description,
1959                                                                           const deUint32                localSize,
1960                                                                           const tcu::IVec2&             imageSize)
1961         : TestCase              (testCtx, name, description)
1962         , m_localSize   (localSize)
1963         , m_imageSize   (imageSize)
1964 {
1965 }
1966
1967 void ImageAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
1968 {
1969         std::ostringstream src;
1970         src << "#version 310 es\n"
1971                 << "#extension GL_OES_shader_image_atomic : require\n"
1972                 << "layout (local_size_x = " << m_localSize << ") in;\n"
1973                 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
1974                 << "layout(binding = 0) readonly buffer Input {\n"
1975                 << "    uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
1976                 << "} sb_in;\n\n"
1977                 << "void main (void) {\n"
1978                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1979                 << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1980                 << "\n"
1981                 << "    if (gl_LocalInvocationIndex == 0u)\n"
1982                 << "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1983                 << "    memoryBarrierImage();\n"
1984                 << "    barrier();\n"
1985                 << "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1986                 << "}\n";
1987
1988         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1989 }
1990
1991 TestInstance* ImageAtomicOpTest::createInstance (Context& context) const
1992 {
1993         return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize);
1994 }
1995
1996 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance (Context& context, const deUint32 localSize, const tcu::IVec2& imageSize)
1997         : TestInstance  (context)
1998         , m_localSize   (localSize)
1999         , m_imageSize   (imageSize)
2000 {
2001 }
2002
2003 tcu::TestStatus ImageAtomicOpTestInstance::iterate (void)
2004 {
2005         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
2006         const VkDevice                  device                          = m_context.getDevice();
2007         const VkQueue                   queue                           = m_context.getUniversalQueue();
2008         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
2009         Allocator&                              allocator                       = m_context.getDefaultAllocator();
2010
2011         // Create an image
2012
2013         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2014         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2015
2016         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2017         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2018
2019         // Input buffer
2020
2021         const deUint32 numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2022         const VkDeviceSize inputBufferSizeBytes = sizeof(deUint32) * numInputValues;
2023
2024         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2025
2026         // Populate the input buffer with test data
2027         {
2028                 de::Random rnd(0x77238ac2);
2029                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
2030                 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
2031                 for (deUint32 i = 0; i < numInputValues; ++i)
2032                         *bufferPtr++ = rnd.getUint32();
2033
2034                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), inputBufferSizeBytes);
2035         }
2036
2037         // Create a buffer to store shader output (copied from image data)
2038
2039         const deUint32 imageArea = multiplyComponents(m_imageSize);
2040         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32) * imageArea;
2041         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
2042
2043         // Create descriptor set
2044
2045         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2046                 DescriptorSetLayoutBuilder()
2047                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2048                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2049                 .build(vk, device));
2050
2051         const Unique<VkDescriptorPool> descriptorPool(
2052                 DescriptorPoolBuilder()
2053                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2054                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2055                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2056
2057         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2058
2059         // Set the bindings
2060
2061         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2062         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2063
2064         DescriptorSetUpdateBuilder()
2065                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2066                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2067                 .update(vk, device);
2068
2069         // Perform the computation
2070         {
2071                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
2072                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2073                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2074
2075                 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2076
2077                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
2078                         VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
2079                         VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2080                         *image, subresourceRange);
2081
2082                 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2083
2084                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
2085
2086                 // Prepare the command buffer
2087
2088                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2089                 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2090
2091                 // Start recording commands
2092
2093                 beginCommandBuffer(vk, *cmdBuffer);
2094
2095                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2096                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2097
2098                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2099                 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2100
2101                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
2102                 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &copyParams);
2103                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2104
2105                 endCommandBuffer(vk, *cmdBuffer);
2106
2107                 // Wait for completion
2108
2109                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2110         }
2111
2112         // Validate the results
2113
2114         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2115         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2116
2117         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2118         const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
2119
2120         for (deUint32 pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2121         {
2122                 const deUint32  res = bufferPtr[pixelNdx];
2123                 deUint32                ref = 0;
2124
2125                 for (deUint32 offs = 0; offs < m_localSize; ++offs)
2126                         ref += refBufferPtr[pixelNdx * m_localSize + offs];
2127
2128                 if (res != ref)
2129                 {
2130                         std::ostringstream msg;
2131                         msg << "Comparison failed for pixel " << pixelNdx;
2132                         return tcu::TestStatus::fail(msg.str());
2133                 }
2134         }
2135         return tcu::TestStatus::pass("Compute succeeded");
2136 }
2137
2138 class ImageBarrierTest : public vkt::TestCase
2139 {
2140 public:
2141                                                 ImageBarrierTest        (tcu::TestContext&      testCtx,
2142                                                                                         const std::string&      name,
2143                                                                                         const std::string&      description,
2144                                                                                         const tcu::IVec2&       imageSize);
2145
2146         void                            initPrograms            (SourceCollections& sourceCollections) const;
2147         TestInstance*           createInstance          (Context&                       context) const;
2148
2149 private:
2150         const tcu::IVec2        m_imageSize;
2151 };
2152
2153 class ImageBarrierTestInstance : public vkt::TestInstance
2154 {
2155 public:
2156                                                                         ImageBarrierTestInstance        (Context&                       context,
2157                                                                                                                                  const tcu::IVec2&      imageSize);
2158
2159         tcu::TestStatus                                 iterate                                         (void);
2160
2161 private:
2162         const tcu::IVec2                                m_imageSize;
2163 };
2164
2165 ImageBarrierTest::ImageBarrierTest (tcu::TestContext&   testCtx,
2166                                                                         const std::string&      name,
2167                                                                         const std::string&      description,
2168                                                                         const tcu::IVec2&       imageSize)
2169         : TestCase              (testCtx, name, description)
2170         , m_imageSize   (imageSize)
2171 {
2172 }
2173
2174 void ImageBarrierTest::initPrograms (SourceCollections& sourceCollections) const
2175 {
2176         sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
2177                 "#version 310 es\n"
2178                 "layout (local_size_x = 1) in;\n"
2179                 "layout(binding = 2) readonly uniform Constants {\n"
2180                 "    uint u_baseVal;\n"
2181                 "};\n"
2182                 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2183                 "void main (void) {\n"
2184                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2185                 "    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2186                 "}\n");
2187
2188         sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
2189                 "#version 310 es\n"
2190                 "layout (local_size_x = 1) in;\n"
2191                 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2192                 "layout(binding = 0) coherent buffer Output {\n"
2193                 "    uint sum;\n"
2194                 "};\n"
2195                 "void main (void) {\n"
2196                 "    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2197                 "    atomicAdd(sum, value);\n"
2198                 "}\n");
2199 }
2200
2201 TestInstance* ImageBarrierTest::createInstance (Context& context) const
2202 {
2203         return new ImageBarrierTestInstance(context, m_imageSize);
2204 }
2205
2206 ImageBarrierTestInstance::ImageBarrierTestInstance (Context& context, const tcu::IVec2& imageSize)
2207         : TestInstance  (context)
2208         , m_imageSize   (imageSize)
2209 {
2210 }
2211
2212 tcu::TestStatus ImageBarrierTestInstance::iterate (void)
2213 {
2214         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
2215         const VkDevice                  device                          = m_context.getDevice();
2216         const VkQueue                   queue                           = m_context.getUniversalQueue();
2217         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
2218         Allocator&                              allocator                       = m_context.getDefaultAllocator();
2219
2220         // Create an image used by both shaders
2221
2222         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2223         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2224
2225         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2226         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2227
2228         // Create an output buffer
2229
2230         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
2231         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2232
2233         // Initialize atomic counter value to zero
2234         {
2235                 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2236                 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2237                 *outputBufferPtr = 0;
2238                 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2239         }
2240
2241         // Create a uniform buffer (to pass uniform constants)
2242
2243         const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
2244         const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2245
2246         // Set the constants in the uniform buffer
2247
2248         const deUint32  baseValue = 127;
2249         {
2250                 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
2251                 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
2252                 uniformBufferPtr[0] = baseValue;
2253
2254                 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
2255         }
2256
2257         // Create descriptor set
2258
2259         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2260                 DescriptorSetLayoutBuilder()
2261                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2262                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2263                 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2264                 .build(vk, device));
2265
2266         const Unique<VkDescriptorPool> descriptorPool(
2267                 DescriptorPoolBuilder()
2268                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2269                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2270                 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2271                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2272
2273         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2274
2275         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2276         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2277         const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2278         DescriptorSetUpdateBuilder()
2279                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2280                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2281                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2282                 .update(vk, device);
2283
2284         // Perform the computation
2285
2286         const Unique<VkShaderModule>    shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
2287         const Unique<VkShaderModule>    shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
2288
2289         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2290         const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
2291         const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
2292
2293         const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2294
2295         const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2296                 0u, 0u,
2297                 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2298                 *image, subresourceRange);
2299
2300         const VkImageMemoryBarrier imageBarrierBetweenShaders = makeImageMemoryBarrier(
2301                 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
2302                 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2303                 *image, subresourceRange);
2304
2305         const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2306
2307         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2308         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2309
2310         // Start recording commands
2311
2312         beginCommandBuffer(vk, *cmdBuffer);
2313
2314         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
2315         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2316
2317         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 1, &imageLayoutBarrier);
2318
2319         vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2320         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imageBarrierBetweenShaders);
2321
2322         // Switch to the second shader program
2323         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
2324
2325         vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2326         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2327
2328         endCommandBuffer(vk, *cmdBuffer);
2329
2330         // Wait for completion
2331
2332         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2333
2334         // Validate the results
2335
2336         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2337         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2338
2339         const int               numValues = multiplyComponents(m_imageSize);
2340         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2341         const deUint32  res = *bufferPtr;
2342         deUint32                ref = 0;
2343
2344         for (int ndx = 0; ndx < numValues; ++ndx)
2345                 ref += baseValue + ndx;
2346
2347         if (res != ref)
2348         {
2349                 std::ostringstream msg;
2350                 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2351                 return tcu::TestStatus::fail(msg.str());
2352         }
2353         return tcu::TestStatus::pass("Compute succeeded");
2354 }
2355
2356 namespace EmptyShaderTest
2357 {
2358
2359 void createProgram (SourceCollections& dst)
2360 {
2361         dst.glslSources.add("comp") << glu::ComputeSource(
2362                 "#version 310 es\n"
2363                 "layout (local_size_x = 1) in;\n"
2364                 "void main (void) {}\n"
2365         );
2366 }
2367
2368 tcu::TestStatus createTest (Context& context)
2369 {
2370         const DeviceInterface&  vk                                      = context.getDeviceInterface();
2371         const VkDevice                  device                          = context.getDevice();
2372         const VkQueue                   queue                           = context.getUniversalQueue();
2373         const deUint32                  queueFamilyIndex        = context.getUniversalQueueFamilyIndex();
2374
2375         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u));
2376
2377         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device));
2378         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2379
2380         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2381         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2382
2383         // Start recording commands
2384
2385         beginCommandBuffer(vk, *cmdBuffer);
2386
2387         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2388
2389         const tcu::IVec3 workGroups(1, 1, 1);
2390         vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
2391
2392         endCommandBuffer(vk, *cmdBuffer);
2393
2394         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2395
2396         return tcu::TestStatus::pass("Compute succeeded");
2397 }
2398
2399 } // EmptyShaderTest ns
2400 } // anonymous
2401
2402 tcu::TestCaseGroup* createBasicComputeShaderTests (tcu::TestContext& testCtx)
2403 {
2404         de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic", "Basic compute tests"));
2405
2406         addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", "Shader that does nothing", EmptyShaderTest::createProgram, EmptyShaderTest::createTest);
2407
2408         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_single_invocation",        "Copy from UBO to SSBO, inverting bits",        256,    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2409         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_single_group",                     "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(2,1,4),      tcu::IVec3(1,1,1)));
2410         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_multiple_invocations",     "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(1,1,1),      tcu::IVec3(2,4,1)));
2411         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_multiple_groups",          "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2412
2413         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_single_invocation",          "Copy between SSBOs, inverting bits",   256,    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2414         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_multiple_invocations",       "Copy between SSBOs, inverting bits",   1024,   tcu::IVec3(1,1,1),      tcu::IVec3(2,4,1)));
2415         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_multiple_groups",            "Copy between SSBOs, inverting bits",   1024,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2416
2417         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_rw_single_invocation",                    "Read and write same SSBO",             256,    true,   tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2418         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_rw_multiple_groups",                              "Read and write same SSBO",             1024,   true,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2419         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_unsized_arr_single_invocation",   "Read and write same SSBO",             256,    false,  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2420         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_unsized_arr_multiple_groups",             "Read and write same SSBO",             1024,   false,  tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2421
2422         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_arr_single_invocation",                 "Write to multiple SSBOs",      256,    true,   tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2423         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_arr_multiple_groups",                   "Write to multiple SSBOs",      1024,   true,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2424         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs",      256,    false,  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2425         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_unsized_arr_multiple_groups",   "Write to multiple SSBOs",      1024,   false,  tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2426
2427         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_single_invocation", "SSBO local barrier usage",     tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2428         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_single_group",              "SSBO local barrier usage",     tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2429         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_multiple_groups",   "SSBO local barrier usage",     tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2430
2431         basicComputeTests->addChild(new SSBOBarrierTest(testCtx,        "ssbo_cmd_barrier_single",              "SSBO memory barrier usage",    tcu::IVec3(1,1,1)));
2432         basicComputeTests->addChild(new SSBOBarrierTest(testCtx,        "ssbo_cmd_barrier_multiple",    "SSBO memory barrier usage",    tcu::IVec3(11,5,7)));
2433
2434         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_single_invocation",         "Basic shared variable usage",  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2435         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_single_group",                      "Basic shared variable usage",  tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2436         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_multiple_invocations",      "Basic shared variable usage",  tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
2437         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_multiple_groups",           "Basic shared variable usage",  tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2438
2439         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_single_invocation",           "Atomic operation with shared var",             tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2440         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_single_group",                        "Atomic operation with shared var",             tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2441         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_multiple_invocations",        "Atomic operation with shared var",             tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
2442         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_multiple_groups",                     "Atomic operation with shared var",             tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2443
2444         basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx,    "copy_image_to_ssbo_small",     "Image to SSBO copy",   tcu::IVec2(1,1),        tcu::IVec2(64,64)));
2445         basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx,    "copy_image_to_ssbo_large",     "Image to SSBO copy",   tcu::IVec2(2,4),        tcu::IVec2(512,512)));
2446
2447         basicComputeTests->addChild(new CopySSBOToImageTest(testCtx,    "copy_ssbo_to_image_small",     "SSBO to image copy",   tcu::IVec2(1, 1),       tcu::IVec2(64, 64)));
2448         basicComputeTests->addChild(new CopySSBOToImageTest(testCtx,    "copy_ssbo_to_image_large",     "SSBO to image copy",   tcu::IVec2(2, 4),       tcu::IVec2(512, 512)));
2449
2450         basicComputeTests->addChild(new ImageAtomicOpTest(testCtx,      "image_atomic_op_local_size_1", "Atomic operation with image",  1,      tcu::IVec2(64,64)));
2451         basicComputeTests->addChild(new ImageAtomicOpTest(testCtx,      "image_atomic_op_local_size_8", "Atomic operation with image",  8,      tcu::IVec2(64,64)));
2452
2453         basicComputeTests->addChild(new ImageBarrierTest(testCtx,       "image_barrier_single",         "Image barrier",        tcu::IVec2(1,1)));
2454         basicComputeTests->addChild(new ImageBarrierTest(testCtx,       "image_barrier_multiple",       "Image barrier",        tcu::IVec2(64,64)));
2455
2456         return basicComputeTests.release();
2457 }
2458
2459 } // compute
2460 } // vkt