Merge "Fix error mask generation in checkLineContinuity" into nougat-cts-dev am:...
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / compute / vktComputeBasicComputeShaderTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 The Android Open Source Project
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Compute Shader Tests
23  *//*--------------------------------------------------------------------*/
24
25 #include "vktComputeBasicComputeShaderTests.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "vktComputeTestsUtil.hpp"
29
30 #include "vkDefs.hpp"
31 #include "vkRef.hpp"
32 #include "vkRefUtil.hpp"
33 #include "vkPlatform.hpp"
34 #include "vkPrograms.hpp"
35 #include "vkRefUtil.hpp"
36 #include "vkMemUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkTypeUtil.hpp"
40
41 #include "deStringUtil.hpp"
42 #include "deUniquePtr.hpp"
43 #include "deRandom.hpp"
44
45 #include <vector>
46
47 using namespace vk;
48
49 namespace vkt
50 {
51 namespace compute
52 {
53 namespace
54 {
55
56 template<typename T, int size>
57 T multiplyComponents (const tcu::Vector<T, size>& v)
58 {
59         T accum = 1;
60         for (int i = 0; i < size; ++i)
61                 accum *= v[i];
62         return accum;
63 }
64
65 template<typename T>
66 inline T squared (const T& a)
67 {
68         return a * a;
69 }
70
71 inline VkImageCreateInfo make2DImageCreateInfo (const tcu::IVec2& imageSize, const VkImageUsageFlags usage)
72 {
73         const VkImageCreateInfo imageParams =
74         {
75                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                            // VkStructureType                      sType;
76                 DE_NULL,                                                                                        // const void*                          pNext;
77                 0u,                                                                                                     // VkImageCreateFlags           flags;
78                 VK_IMAGE_TYPE_2D,                                                                       // VkImageType                          imageType;
79                 VK_FORMAT_R32_UINT,                                                                     // VkFormat                                     format;
80                 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1),      // VkExtent3D                           extent;
81                 1u,                                                                                                     // deUint32                                     mipLevels;
82                 1u,                                                                                                     // deUint32                                     arrayLayers;
83                 VK_SAMPLE_COUNT_1_BIT,                                                          // VkSampleCountFlagBits        samples;
84                 VK_IMAGE_TILING_OPTIMAL,                                                        // VkImageTiling                        tiling;
85                 usage,                                                                                          // VkImageUsageFlags            usage;
86                 VK_SHARING_MODE_EXCLUSIVE,                                                      // VkSharingMode                        sharingMode;
87                 0u,                                                                                                     // deUint32                                     queueFamilyIndexCount;
88                 DE_NULL,                                                                                        // const deUint32*                      pQueueFamilyIndices;
89                 VK_IMAGE_LAYOUT_UNDEFINED,                                                      // VkImageLayout                        initialLayout;
90         };
91         return imageParams;
92 }
93
94 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2& imageSize)
95 {
96         return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
97 }
98
99 enum BufferType
100 {
101         BUFFER_TYPE_UNIFORM,
102         BUFFER_TYPE_SSBO,
103 };
104
105 class SharedVarTest : public vkt::TestCase
106 {
107 public:
108                                                 SharedVarTest   (tcu::TestContext&              testCtx,
109                                                                                  const std::string&             name,
110                                                                                  const std::string&             description,
111                                                                                  const tcu::IVec3&              localSize,
112                                                                                  const tcu::IVec3&              workSize);
113
114         void                            initPrograms    (SourceCollections&             sourceCollections) const;
115         TestInstance*           createInstance  (Context&                               context) const;
116
117 private:
118         const tcu::IVec3        m_localSize;
119         const tcu::IVec3        m_workSize;
120 };
121
122 class SharedVarTestInstance : public vkt::TestInstance
123 {
124 public:
125                                                                         SharedVarTestInstance   (Context&                       context,
126                                                                                                                          const tcu::IVec3&      localSize,
127                                                                                                                          const tcu::IVec3&      workSize);
128
129         tcu::TestStatus                                 iterate                                 (void);
130
131 private:
132         const tcu::IVec3                                m_localSize;
133         const tcu::IVec3                                m_workSize;
134 };
135
136 SharedVarTest::SharedVarTest (tcu::TestContext&         testCtx,
137                                                           const std::string&    name,
138                                                           const std::string&    description,
139                                                           const tcu::IVec3&             localSize,
140                                                           const tcu::IVec3&             workSize)
141         : TestCase              (testCtx, name, description)
142         , m_localSize   (localSize)
143         , m_workSize    (workSize)
144 {
145 }
146
147 void SharedVarTest::initPrograms (SourceCollections& sourceCollections) const
148 {
149         const int workGroupSize = multiplyComponents(m_localSize);
150         const int workGroupCount = multiplyComponents(m_workSize);
151         const int numValues = workGroupSize * workGroupCount;
152
153         std::ostringstream src;
154         src << "#version 310 es\n"
155                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
156                 << "layout(binding = 0) writeonly buffer Output {\n"
157                 << "    uint values[" << numValues << "];\n"
158                 << "} sb_out;\n\n"
159                 << "shared uint offsets[" << workGroupSize << "];\n\n"
160                 << "void main (void) {\n"
161                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
162                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
163                 << "    uint globalOffs = localSize*globalNdx;\n"
164                 << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
165                 << "\n"
166                 << "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
167                 << "    memoryBarrierShared();\n"
168                 << "    barrier();\n"
169                 << "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
170                 << "}\n";
171
172         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
173 }
174
175 TestInstance* SharedVarTest::createInstance (Context& context) const
176 {
177         return new SharedVarTestInstance(context, m_localSize, m_workSize);
178 }
179
180 SharedVarTestInstance::SharedVarTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
181         : TestInstance  (context)
182         , m_localSize   (localSize)
183         , m_workSize    (workSize)
184 {
185 }
186
187 tcu::TestStatus SharedVarTestInstance::iterate (void)
188 {
189         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
190         const VkDevice                  device                          = m_context.getDevice();
191         const VkQueue                   queue                           = m_context.getUniversalQueue();
192         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
193         Allocator&                              allocator                       = m_context.getDefaultAllocator();
194
195         const int workGroupSize = multiplyComponents(m_localSize);
196         const int workGroupCount = multiplyComponents(m_workSize);
197
198         // Create a buffer and host-visible memory for it
199
200         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
201         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
202
203         // Create descriptor set
204
205         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
206                 DescriptorSetLayoutBuilder()
207                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
208                 .build(vk, device));
209
210         const Unique<VkDescriptorPool> descriptorPool(
211                 DescriptorPoolBuilder()
212                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
213                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
214
215         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
216
217         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
218         DescriptorSetUpdateBuilder()
219                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
220                 .update(vk, device);
221
222         // Perform the computation
223
224         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
225         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
226         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
227
228         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
229
230         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
231         const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
232
233         // Start recording commands
234
235         beginCommandBuffer(vk, *cmdBuffer);
236
237         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
238         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
239
240         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
241
242         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
243
244         endCommandBuffer(vk, *cmdBuffer);
245
246         // Wait for completion
247
248         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
249
250         // Validate the results
251
252         const Allocation& bufferAllocation = buffer.getAllocation();
253         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
254
255         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
256
257         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
258         {
259                 const int globalOffset = groupNdx * workGroupSize;
260                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
261                 {
262                         const deUint32 res = bufferPtr[globalOffset + localOffset];
263                         const deUint32 ref = globalOffset + squared(workGroupSize - localOffset - 1);
264
265                         if (res != ref)
266                         {
267                                 std::ostringstream msg;
268                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
269                                 return tcu::TestStatus::fail(msg.str());
270                         }
271                 }
272         }
273         return tcu::TestStatus::pass("Compute succeeded");
274 }
275
276 class SharedVarAtomicOpTest : public vkt::TestCase
277 {
278 public:
279                                                 SharedVarAtomicOpTest   (tcu::TestContext&      testCtx,
280                                                                                                  const std::string&     name,
281                                                                                                  const std::string&     description,
282                                                                                                  const tcu::IVec3&      localSize,
283                                                                                                  const tcu::IVec3&      workSize);
284
285         void                            initPrograms                    (SourceCollections& sourceCollections) const;
286         TestInstance*           createInstance                  (Context&                       context) const;
287
288 private:
289         const tcu::IVec3        m_localSize;
290         const tcu::IVec3        m_workSize;
291 };
292
293 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
294 {
295 public:
296                                                                         SharedVarAtomicOpTestInstance   (Context&                       context,
297                                                                                                                                          const tcu::IVec3&      localSize,
298                                                                                                                                          const tcu::IVec3&      workSize);
299
300         tcu::TestStatus                                 iterate                                                 (void);
301
302 private:
303         const tcu::IVec3                                m_localSize;
304         const tcu::IVec3                                m_workSize;
305 };
306
307 SharedVarAtomicOpTest::SharedVarAtomicOpTest (tcu::TestContext&         testCtx,
308                                                                                           const std::string&    name,
309                                                                                           const std::string&    description,
310                                                                                           const tcu::IVec3&             localSize,
311                                                                                           const tcu::IVec3&             workSize)
312         : TestCase              (testCtx, name, description)
313         , m_localSize   (localSize)
314         , m_workSize    (workSize)
315 {
316 }
317
318 void SharedVarAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
319 {
320         const int workGroupSize = multiplyComponents(m_localSize);
321         const int workGroupCount = multiplyComponents(m_workSize);
322         const int numValues = workGroupSize * workGroupCount;
323
324         std::ostringstream src;
325         src << "#version 310 es\n"
326                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
327                 << "layout(binding = 0) writeonly buffer Output {\n"
328                 << "    uint values[" << numValues << "];\n"
329                 << "} sb_out;\n\n"
330                 << "shared uint count;\n\n"
331                 << "void main (void) {\n"
332                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
333                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
334                 << "    uint globalOffs = localSize*globalNdx;\n"
335                 << "\n"
336                 << "    count = 0u;\n"
337                 << "    memoryBarrierShared();\n"
338                 << "    barrier();\n"
339                 << "    uint oldVal = atomicAdd(count, 1u);\n"
340                 << "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
341                 << "}\n";
342
343         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
344 }
345
346 TestInstance* SharedVarAtomicOpTest::createInstance (Context& context) const
347 {
348         return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize);
349 }
350
351 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
352         : TestInstance  (context)
353         , m_localSize   (localSize)
354         , m_workSize    (workSize)
355 {
356 }
357
358 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate (void)
359 {
360         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
361         const VkDevice                  device                          = m_context.getDevice();
362         const VkQueue                   queue                           = m_context.getUniversalQueue();
363         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
364         Allocator&                              allocator                       = m_context.getDefaultAllocator();
365
366         const int workGroupSize = multiplyComponents(m_localSize);
367         const int workGroupCount = multiplyComponents(m_workSize);
368
369         // Create a buffer and host-visible memory for it
370
371         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
372         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
373
374         // Create descriptor set
375
376         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
377                 DescriptorSetLayoutBuilder()
378                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
379                 .build(vk, device));
380
381         const Unique<VkDescriptorPool> descriptorPool(
382                 DescriptorPoolBuilder()
383                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
384                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
385
386         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
387
388         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
389         DescriptorSetUpdateBuilder()
390                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
391                 .update(vk, device);
392
393         // Perform the computation
394
395         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
396         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
397         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
398
399         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
400
401         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
402         const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
403
404         // Start recording commands
405
406         beginCommandBuffer(vk, *cmdBuffer);
407
408         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
409         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
410
411         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
412
413         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1u, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
414
415         endCommandBuffer(vk, *cmdBuffer);
416
417         // Wait for completion
418
419         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
420
421         // Validate the results
422
423         const Allocation& bufferAllocation = buffer.getAllocation();
424         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
425
426         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
427
428         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
429         {
430                 const int globalOffset = groupNdx * workGroupSize;
431                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
432                 {
433                         const deUint32 res = bufferPtr[globalOffset + localOffset];
434                         const deUint32 ref = localOffset + 1;
435
436                         if (res != ref)
437                         {
438                                 std::ostringstream msg;
439                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
440                                 return tcu::TestStatus::fail(msg.str());
441                         }
442                 }
443         }
444         return tcu::TestStatus::pass("Compute succeeded");
445 }
446
447 class SSBOLocalBarrierTest : public vkt::TestCase
448 {
449 public:
450                                                 SSBOLocalBarrierTest    (tcu::TestContext&      testCtx,
451                                                                                                  const std::string& name,
452                                                                                                  const std::string&     description,
453                                                                                                  const tcu::IVec3&      localSize,
454                                                                                                  const tcu::IVec3&      workSize);
455
456         void                            initPrograms                    (SourceCollections& sourceCollections) const;
457         TestInstance*           createInstance                  (Context&                       context) const;
458
459 private:
460         const tcu::IVec3        m_localSize;
461         const tcu::IVec3        m_workSize;
462 };
463
464 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
465 {
466 public:
467                                                                         SSBOLocalBarrierTestInstance    (Context&                       context,
468                                                                                                                                          const tcu::IVec3&      localSize,
469                                                                                                                                          const tcu::IVec3&      workSize);
470
471         tcu::TestStatus                                 iterate                                                 (void);
472
473 private:
474         const tcu::IVec3                                m_localSize;
475         const tcu::IVec3                                m_workSize;
476 };
477
478 SSBOLocalBarrierTest::SSBOLocalBarrierTest (tcu::TestContext&   testCtx,
479                                                                                         const std::string&      name,
480                                                                                         const std::string&      description,
481                                                                                         const tcu::IVec3&       localSize,
482                                                                                         const tcu::IVec3&       workSize)
483         : TestCase              (testCtx, name, description)
484         , m_localSize   (localSize)
485         , m_workSize    (workSize)
486 {
487 }
488
489 void SSBOLocalBarrierTest::initPrograms (SourceCollections& sourceCollections) const
490 {
491         const int workGroupSize = multiplyComponents(m_localSize);
492         const int workGroupCount = multiplyComponents(m_workSize);
493         const int numValues = workGroupSize * workGroupCount;
494
495         std::ostringstream src;
496         src << "#version 310 es\n"
497                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
498                 << "layout(binding = 0) coherent buffer Output {\n"
499                 << "    uint values[" << numValues << "];\n"
500                 << "} sb_out;\n\n"
501                 << "void main (void) {\n"
502                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
503                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
504                 << "    uint globalOffs = localSize*globalNdx;\n"
505                 << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
506                 << "\n"
507                 << "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
508                 << "    memoryBarrierBuffer();\n"
509                 << "    barrier();\n"
510                 << "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"         // += so we read and write
511                 << "    memoryBarrierBuffer();\n"
512                 << "    barrier();\n"
513                 << "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
514                 << "}\n";
515
516         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
517 }
518
519 TestInstance* SSBOLocalBarrierTest::createInstance (Context& context) const
520 {
521         return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize);
522 }
523
524 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
525         : TestInstance  (context)
526         , m_localSize   (localSize)
527         , m_workSize    (workSize)
528 {
529 }
530
531 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate (void)
532 {
533         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
534         const VkDevice                  device                          = m_context.getDevice();
535         const VkQueue                   queue                           = m_context.getUniversalQueue();
536         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
537         Allocator&                              allocator                       = m_context.getDefaultAllocator();
538
539         const int workGroupSize = multiplyComponents(m_localSize);
540         const int workGroupCount = multiplyComponents(m_workSize);
541
542         // Create a buffer and host-visible memory for it
543
544         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
545         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
546
547         // Create descriptor set
548
549         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
550                 DescriptorSetLayoutBuilder()
551                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
552                 .build(vk, device));
553
554         const Unique<VkDescriptorPool> descriptorPool(
555                 DescriptorPoolBuilder()
556                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
557                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
558
559         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
560
561         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
562         DescriptorSetUpdateBuilder()
563                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
564                 .update(vk, device);
565
566         // Perform the computation
567
568         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
569         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
570         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
571
572         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
573
574         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
575         const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
576
577         // Start recording commands
578
579         beginCommandBuffer(vk, *cmdBuffer);
580
581         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
582         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
583
584         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
585
586         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
587
588         endCommandBuffer(vk, *cmdBuffer);
589
590         // Wait for completion
591
592         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
593
594         // Validate the results
595
596         const Allocation& bufferAllocation = buffer.getAllocation();
597         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
598
599         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
600
601         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
602         {
603                 const int globalOffset = groupNdx * workGroupSize;
604                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
605                 {
606                         const deUint32  res             = bufferPtr[globalOffset + localOffset];
607                         const int               offs0   = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) : ((localOffset - 1) % workGroupSize);
608                         const int               offs1   = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) : ((localOffset - 2) % workGroupSize);
609                         const deUint32  ref             = static_cast<deUint32>(globalOffset + offs0 + offs1);
610
611                         if (res != ref)
612                         {
613                                 std::ostringstream msg;
614                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
615                                 return tcu::TestStatus::fail(msg.str());
616                         }
617                 }
618         }
619         return tcu::TestStatus::pass("Compute succeeded");
620 }
621
622 class CopyImageToSSBOTest : public vkt::TestCase
623 {
624 public:
625                                                 CopyImageToSSBOTest             (tcu::TestContext&      testCtx,
626                                                                                                  const std::string&     name,
627                                                                                                  const std::string&     description,
628                                                                                                  const tcu::IVec2&      localSize,
629                                                                                                  const tcu::IVec2&      imageSize);
630
631         void                            initPrograms                    (SourceCollections& sourceCollections) const;
632         TestInstance*           createInstance                  (Context&                       context) const;
633
634 private:
635         const tcu::IVec2        m_localSize;
636         const tcu::IVec2        m_imageSize;
637 };
638
639 class CopyImageToSSBOTestInstance : public vkt::TestInstance
640 {
641 public:
642                                                                         CopyImageToSSBOTestInstance             (Context&                       context,
643                                                                                                                                          const tcu::IVec2&      localSize,
644                                                                                                                                          const tcu::IVec2&      imageSize);
645
646         tcu::TestStatus                                 iterate                                                 (void);
647
648 private:
649         const tcu::IVec2                                m_localSize;
650         const tcu::IVec2                                m_imageSize;
651 };
652
653 CopyImageToSSBOTest::CopyImageToSSBOTest (tcu::TestContext&             testCtx,
654                                                                                   const std::string&    name,
655                                                                                   const std::string&    description,
656                                                                                   const tcu::IVec2&             localSize,
657                                                                                   const tcu::IVec2&             imageSize)
658         : TestCase              (testCtx, name, description)
659         , m_localSize   (localSize)
660         , m_imageSize   (imageSize)
661 {
662         DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
663         DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
664 }
665
666 void CopyImageToSSBOTest::initPrograms (SourceCollections& sourceCollections) const
667 {
668         std::ostringstream src;
669         src << "#version 310 es\n"
670                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
671                 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
672                 << "layout(binding = 0) writeonly buffer Output {\n"
673                 << "    uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
674                 << "} sb_out;\n\n"
675                 << "void main (void) {\n"
676                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
677                 << "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
678                 << "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
679                 << "}\n";
680
681         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
682 }
683
684 TestInstance* CopyImageToSSBOTest::createInstance (Context& context) const
685 {
686         return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize);
687 }
688
689 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
690         : TestInstance  (context)
691         , m_localSize   (localSize)
692         , m_imageSize   (imageSize)
693 {
694 }
695
696 tcu::TestStatus CopyImageToSSBOTestInstance::iterate (void)
697 {
698         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
699         const VkDevice                  device                          = m_context.getDevice();
700         const VkQueue                   queue                           = m_context.getUniversalQueue();
701         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
702         Allocator&                              allocator                       = m_context.getDefaultAllocator();
703
704         // Create an image
705
706         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
707         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
708
709         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
710         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
711
712         // Staging buffer (source data for image)
713
714         const deUint32 imageArea = multiplyComponents(m_imageSize);
715         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
716
717         const Buffer stagingBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
718
719         // Populate the staging buffer with test data
720         {
721                 de::Random rnd(0xab2c7);
722                 const Allocation& stagingBufferAllocation = stagingBuffer.getAllocation();
723                 deUint32* bufferPtr = static_cast<deUint32*>(stagingBufferAllocation.getHostPtr());
724                 for (deUint32 i = 0; i < imageArea; ++i)
725                         *bufferPtr++ = rnd.getUint32();
726
727                 flushMappedMemoryRange(vk, device, stagingBufferAllocation.getMemory(), stagingBufferAllocation.getOffset(), bufferSizeBytes);
728         }
729
730         // Create a buffer to store shader output
731
732         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
733
734         // Create descriptor set
735
736         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
737                 DescriptorSetLayoutBuilder()
738                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
739                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
740                 .build(vk, device));
741
742         const Unique<VkDescriptorPool> descriptorPool(
743                 DescriptorPoolBuilder()
744                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
745                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
746                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
747
748         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
749
750         // Set the bindings
751
752         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
753         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
754
755         DescriptorSetUpdateBuilder()
756                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
757                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
758                 .update(vk, device);
759
760         // Perform the computation
761         {
762                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
763                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
764                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
765
766                 const VkBufferMemoryBarrier stagingBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *stagingBuffer, 0ull, bufferSizeBytes);
767
768                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
769                         0u, VK_ACCESS_TRANSFER_WRITE_BIT,
770                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
771                         *image, subresourceRange);
772
773                 const VkImageMemoryBarrier imagePostCopyBarrier = makeImageMemoryBarrier(
774                         VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
775                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
776                         *image, subresourceRange);
777
778                 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
779
780                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
781                 const tcu::IVec2 workSize = m_imageSize / m_localSize;
782
783                 // Prepare the command buffer
784
785                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
786                 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
787
788                 // Start recording commands
789
790                 beginCommandBuffer(vk, *cmdBuffer);
791
792                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
793                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
794
795                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &stagingBufferPostHostWriteBarrier, 1, &imagePreCopyBarrier);
796                 vk.cmdCopyBufferToImage(*cmdBuffer, *stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &copyParams);
797                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePostCopyBarrier);
798
799                 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
800                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
801
802                 endCommandBuffer(vk, *cmdBuffer);
803
804                 // Wait for completion
805
806                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
807         }
808
809         // Validate the results
810
811         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
812         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
813
814         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
815         const deUint32* refBufferPtr = static_cast<deUint32*>(stagingBuffer.getAllocation().getHostPtr());
816
817         for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
818         {
819                 const deUint32 res = *(bufferPtr + ndx);
820                 const deUint32 ref = *(refBufferPtr + ndx);
821
822                 if (res != ref)
823                 {
824                         std::ostringstream msg;
825                         msg << "Comparison failed for Output.values[" << ndx << "]";
826                         return tcu::TestStatus::fail(msg.str());
827                 }
828         }
829         return tcu::TestStatus::pass("Compute succeeded");
830 }
831
832 class CopySSBOToImageTest : public vkt::TestCase
833 {
834 public:
835                                                 CopySSBOToImageTest     (tcu::TestContext&      testCtx,
836                                                                                          const std::string&     name,
837                                                                                          const std::string&     description,
838                                                                                          const tcu::IVec2&      localSize,
839                                                                                          const tcu::IVec2&      imageSize);
840
841         void                            initPrograms            (SourceCollections& sourceCollections) const;
842         TestInstance*           createInstance          (Context&                       context) const;
843
844 private:
845         const tcu::IVec2        m_localSize;
846         const tcu::IVec2        m_imageSize;
847 };
848
849 class CopySSBOToImageTestInstance : public vkt::TestInstance
850 {
851 public:
852                                                                         CopySSBOToImageTestInstance     (Context&                       context,
853                                                                                                                                  const tcu::IVec2&      localSize,
854                                                                                                                                  const tcu::IVec2&      imageSize);
855
856         tcu::TestStatus                                 iterate                                         (void);
857
858 private:
859         const tcu::IVec2                                m_localSize;
860         const tcu::IVec2                                m_imageSize;
861 };
862
863 CopySSBOToImageTest::CopySSBOToImageTest (tcu::TestContext&             testCtx,
864                                                                                   const std::string&    name,
865                                                                                   const std::string&    description,
866                                                                                   const tcu::IVec2&             localSize,
867                                                                                   const tcu::IVec2&             imageSize)
868         : TestCase              (testCtx, name, description)
869         , m_localSize   (localSize)
870         , m_imageSize   (imageSize)
871 {
872         DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
873         DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
874 }
875
876 void CopySSBOToImageTest::initPrograms (SourceCollections& sourceCollections) const
877 {
878         std::ostringstream src;
879         src << "#version 310 es\n"
880                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
881                 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
882                 << "layout(binding = 0) readonly buffer Input {\n"
883                 << "    uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
884                 << "} sb_in;\n\n"
885                 << "void main (void) {\n"
886                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
887                 << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
888                 << "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
889                 << "}\n";
890
891         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
892 }
893
894 TestInstance* CopySSBOToImageTest::createInstance (Context& context) const
895 {
896         return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize);
897 }
898
899 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
900         : TestInstance  (context)
901         , m_localSize   (localSize)
902         , m_imageSize   (imageSize)
903 {
904 }
905
906 tcu::TestStatus CopySSBOToImageTestInstance::iterate (void)
907 {
908         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
909         const VkDevice                  device                          = m_context.getDevice();
910         const VkQueue                   queue                           = m_context.getUniversalQueue();
911         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
912         Allocator&                              allocator                       = m_context.getDefaultAllocator();
913
914         // Create an image
915
916         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
917         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
918
919         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
920         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
921
922         // Create an input buffer (data to be read in the shader)
923
924         const deUint32 imageArea = multiplyComponents(m_imageSize);
925         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
926
927         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
928
929         // Populate the buffer with test data
930         {
931                 de::Random rnd(0x77238ac2);
932                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
933                 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
934                 for (deUint32 i = 0; i < imageArea; ++i)
935                         *bufferPtr++ = rnd.getUint32();
936
937                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
938         }
939
940         // Create a buffer to store shader output (copied from image data)
941
942         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
943
944         // Create descriptor set
945
946         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
947                 DescriptorSetLayoutBuilder()
948                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
949                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
950                 .build(vk, device));
951
952         const Unique<VkDescriptorPool> descriptorPool(
953                 DescriptorPoolBuilder()
954                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
955                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
956                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
957
958         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
959
960         // Set the bindings
961
962         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
963         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
964
965         DescriptorSetUpdateBuilder()
966                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
967                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
968                 .update(vk, device);
969
970         // Perform the computation
971         {
972                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
973                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
974                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
975
976                 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
977
978                 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
979                         0u, 0u,
980                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
981                         *image, subresourceRange);
982
983                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
984                         VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
985                         VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
986                         *image, subresourceRange);
987
988                 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
989
990                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
991                 const tcu::IVec2 workSize = m_imageSize / m_localSize;
992
993                 // Prepare the command buffer
994
995                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
996                 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
997
998                 // Start recording commands
999
1000                 beginCommandBuffer(vk, *cmdBuffer);
1001
1002                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1003                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1004
1005                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
1006                 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1007
1008                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
1009                 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &copyParams);
1010                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1011
1012                 endCommandBuffer(vk, *cmdBuffer);
1013
1014                 // Wait for completion
1015
1016                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1017         }
1018
1019         // Validate the results
1020
1021         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1022         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1023
1024         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1025         const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1026
1027         for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
1028         {
1029                 const deUint32 res = *(bufferPtr + ndx);
1030                 const deUint32 ref = *(refBufferPtr + ndx);
1031
1032                 if (res != ref)
1033                 {
1034                         std::ostringstream msg;
1035                         msg << "Comparison failed for pixel " << ndx;
1036                         return tcu::TestStatus::fail(msg.str());
1037                 }
1038         }
1039         return tcu::TestStatus::pass("Compute succeeded");
1040 }
1041
1042 class BufferToBufferInvertTest : public vkt::TestCase
1043 {
1044 public:
1045         void                                                            initPrograms                            (SourceCollections&     sourceCollections) const;
1046         TestInstance*                                           createInstance                          (Context&                       context) const;
1047
1048         static BufferToBufferInvertTest*        UBOToSSBOInvertCase                     (tcu::TestContext&      testCtx,
1049                                                                                                                                          const std::string& name,
1050                                                                                                                                          const std::string& description,
1051                                                                                                                                          const deUint32         numValues,
1052                                                                                                                                          const tcu::IVec3&      localSize,
1053                                                                                                                                          const tcu::IVec3&      workSize);
1054
1055         static BufferToBufferInvertTest*        CopyInvertSSBOCase                      (tcu::TestContext&      testCtx,
1056                                                                                                                                          const std::string& name,
1057                                                                                                                                          const std::string& description,
1058                                                                                                                                          const deUint32         numValues,
1059                                                                                                                                          const tcu::IVec3&      localSize,
1060                                                                                                                                          const tcu::IVec3&      workSize);
1061
1062 private:
1063                                                                                 BufferToBufferInvertTest        (tcu::TestContext&      testCtx,
1064                                                                                                                                          const std::string& name,
1065                                                                                                                                          const std::string& description,
1066                                                                                                                                          const deUint32         numValues,
1067                                                                                                                                          const tcu::IVec3&      localSize,
1068                                                                                                                                          const tcu::IVec3&      workSize,
1069                                                                                                                                          const BufferType       bufferType);
1070
1071         const BufferType                                        m_bufferType;
1072         const deUint32                                          m_numValues;
1073         const tcu::IVec3                                        m_localSize;
1074         const tcu::IVec3                                        m_workSize;
1075 };
1076
1077 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1078 {
1079 public:
1080                                                                         BufferToBufferInvertTestInstance        (Context&                       context,
1081                                                                                                                                                  const deUint32         numValues,
1082                                                                                                                                                  const tcu::IVec3&      localSize,
1083                                                                                                                                                  const tcu::IVec3&      workSize,
1084                                                                                                                                                  const BufferType       bufferType);
1085
1086         tcu::TestStatus                                 iterate                                                         (void);
1087
1088 private:
1089         const BufferType                                m_bufferType;
1090         const deUint32                                  m_numValues;
1091         const tcu::IVec3                                m_localSize;
1092         const tcu::IVec3                                m_workSize;
1093 };
1094
1095 BufferToBufferInvertTest::BufferToBufferInvertTest (tcu::TestContext&   testCtx,
1096                                                                                                         const std::string&      name,
1097                                                                                                         const std::string&      description,
1098                                                                                                         const deUint32          numValues,
1099                                                                                                         const tcu::IVec3&       localSize,
1100                                                                                                         const tcu::IVec3&       workSize,
1101                                                                                                         const BufferType        bufferType)
1102         : TestCase              (testCtx, name, description)
1103         , m_bufferType  (bufferType)
1104         , m_numValues   (numValues)
1105         , m_localSize   (localSize)
1106         , m_workSize    (workSize)
1107 {
1108         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1109         DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1110 }
1111
1112 BufferToBufferInvertTest* BufferToBufferInvertTest::UBOToSSBOInvertCase (tcu::TestContext&      testCtx,
1113                                                                                                                                                  const std::string&     name,
1114                                                                                                                                                  const std::string&     description,
1115                                                                                                                                                  const deUint32         numValues,
1116                                                                                                                                                  const tcu::IVec3&      localSize,
1117                                                                                                                                                  const tcu::IVec3&      workSize)
1118 {
1119         return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM);
1120 }
1121
1122 BufferToBufferInvertTest* BufferToBufferInvertTest::CopyInvertSSBOCase (tcu::TestContext&       testCtx,
1123                                                                                                                                                 const std::string&      name,
1124                                                                                                                                                 const std::string&      description,
1125                                                                                                                                                 const deUint32          numValues,
1126                                                                                                                                                 const tcu::IVec3&       localSize,
1127                                                                                                                                                 const tcu::IVec3&       workSize)
1128 {
1129         return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_SSBO);
1130 }
1131
1132 void BufferToBufferInvertTest::initPrograms (SourceCollections& sourceCollections) const
1133 {
1134         std::ostringstream src;
1135         if (m_bufferType == BUFFER_TYPE_UNIFORM)
1136         {
1137                 src << "#version 310 es\n"
1138                         << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1139                         << "layout(binding = 0) readonly uniform Input {\n"
1140                         << "    uint values[" << m_numValues << "];\n"
1141                         << "} ub_in;\n"
1142                         << "layout(binding = 1, std140) writeonly buffer Output {\n"
1143                         << "    uint values[" << m_numValues << "];\n"
1144                         << "} sb_out;\n"
1145                         << "void main (void) {\n"
1146                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1147                         << "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1148                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1149                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
1150                         << "\n"
1151                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1152                         << "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1153                         << "}\n";
1154         }
1155         else if (m_bufferType == BUFFER_TYPE_SSBO)
1156         {
1157                 src << "#version 310 es\n"
1158                         << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1159                         << "layout(binding = 0, std140) readonly buffer Input {\n"
1160                         << "    uint values[" << m_numValues << "];\n"
1161                         << "} sb_in;\n"
1162                         << "layout (binding = 1, std140) writeonly buffer Output {\n"
1163                         << "    uint values[" << m_numValues << "];\n"
1164                         << "} sb_out;\n"
1165                         << "void main (void) {\n"
1166                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1167                         << "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1168                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1169                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
1170                         << "\n"
1171                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1172                         << "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1173                         << "}\n";
1174         }
1175
1176         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1177 }
1178
1179 TestInstance* BufferToBufferInvertTest::createInstance (Context& context) const
1180 {
1181         return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType);
1182 }
1183
1184 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance (Context&                    context,
1185                                                                                                                                         const deUint32          numValues,
1186                                                                                                                                         const tcu::IVec3&       localSize,
1187                                                                                                                                         const tcu::IVec3&       workSize,
1188                                                                                                                                         const BufferType        bufferType)
1189         : TestInstance  (context)
1190         , m_bufferType  (bufferType)
1191         , m_numValues   (numValues)
1192         , m_localSize   (localSize)
1193         , m_workSize    (workSize)
1194 {
1195 }
1196
1197 tcu::TestStatus BufferToBufferInvertTestInstance::iterate (void)
1198 {
1199         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1200         const VkDevice                  device                          = m_context.getDevice();
1201         const VkQueue                   queue                           = m_context.getUniversalQueue();
1202         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1203         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1204
1205         // Customize the test based on buffer type
1206
1207         const VkBufferUsageFlags inputBufferUsageFlags          = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1208         const VkDescriptorType inputBufferDescriptorType        = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1209         const deUint32 randomSeed                                                       = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1210
1211         // Create an input buffer
1212
1213         const VkDeviceSize bufferSizeBytes = sizeof(tcu::UVec4) * m_numValues;
1214         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags), MemoryRequirement::HostVisible);
1215
1216         // Fill the input buffer with data
1217         {
1218                 de::Random rnd(randomSeed);
1219                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
1220                 tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(inputBufferAllocation.getHostPtr());
1221                 for (deUint32 i = 0; i < m_numValues; ++i)
1222                         bufferPtr[i].x() = rnd.getUint32();
1223
1224                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
1225         }
1226
1227         // Create an output buffer
1228
1229         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1230
1231         // Create descriptor set
1232
1233         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1234                 DescriptorSetLayoutBuilder()
1235                 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1236                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1237                 .build(vk, device));
1238
1239         const Unique<VkDescriptorPool> descriptorPool(
1240                 DescriptorPoolBuilder()
1241                 .addType(inputBufferDescriptorType)
1242                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1243                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1244
1245         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1246
1247         const VkDescriptorBufferInfo inputBufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1248         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1249         DescriptorSetUpdateBuilder()
1250                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType, &inputBufferDescriptorInfo)
1251                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1252                 .update(vk, device);
1253
1254         // Perform the computation
1255
1256         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1257         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1258         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1259
1260         const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1261
1262         const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1263
1264         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1265         const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1266
1267         // Start recording commands
1268
1269         beginCommandBuffer(vk, *cmdBuffer);
1270
1271         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1272         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1273
1274         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1275         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1276         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1277
1278         endCommandBuffer(vk, *cmdBuffer);
1279
1280         // Wait for completion
1281
1282         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1283
1284         // Validate the results
1285
1286         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1287         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1288
1289         const tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(outputBufferAllocation.getHostPtr());
1290         const tcu::UVec4* refBufferPtr = static_cast<tcu::UVec4*>(inputBuffer.getAllocation().getHostPtr());
1291
1292         for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1293         {
1294                 const deUint32 res = bufferPtr[ndx].x();
1295                 const deUint32 ref = ~refBufferPtr[ndx].x();
1296
1297                 if (res != ref)
1298                 {
1299                         std::ostringstream msg;
1300                         msg << "Comparison failed for Output.values[" << ndx << "]";
1301                         return tcu::TestStatus::fail(msg.str());
1302                 }
1303         }
1304         return tcu::TestStatus::pass("Compute succeeded");
1305 }
1306
1307 class InvertSSBOInPlaceTest : public vkt::TestCase
1308 {
1309 public:
1310                                                 InvertSSBOInPlaceTest   (tcu::TestContext&      testCtx,
1311                                                                                                  const std::string&     name,
1312                                                                                                  const std::string&     description,
1313                                                                                                  const deUint32         numValues,
1314                                                                                                  const bool                     sized,
1315                                                                                                  const tcu::IVec3&      localSize,
1316                                                                                                  const tcu::IVec3&      workSize);
1317
1318
1319         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1320         TestInstance*           createInstance                  (Context&                       context) const;
1321
1322 private:
1323         const deUint32          m_numValues;
1324         const bool                      m_sized;
1325         const tcu::IVec3        m_localSize;
1326         const tcu::IVec3        m_workSize;
1327 };
1328
1329 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1330 {
1331 public:
1332                                                                         InvertSSBOInPlaceTestInstance   (Context&                       context,
1333                                                                                                                                          const deUint32         numValues,
1334                                                                                                                                          const tcu::IVec3&      localSize,
1335                                                                                                                                          const tcu::IVec3&      workSize);
1336
1337         tcu::TestStatus                                 iterate                                                 (void);
1338
1339 private:
1340         const deUint32                                  m_numValues;
1341         const tcu::IVec3                                m_localSize;
1342         const tcu::IVec3                                m_workSize;
1343 };
1344
1345 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest (tcu::TestContext&         testCtx,
1346                                                                                           const std::string&    name,
1347                                                                                           const std::string&    description,
1348                                                                                           const deUint32                numValues,
1349                                                                                           const bool                    sized,
1350                                                                                           const tcu::IVec3&             localSize,
1351                                                                                           const tcu::IVec3&             workSize)
1352         : TestCase              (testCtx, name, description)
1353         , m_numValues   (numValues)
1354         , m_sized               (sized)
1355         , m_localSize   (localSize)
1356         , m_workSize    (workSize)
1357 {
1358         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1359 }
1360
1361 void InvertSSBOInPlaceTest::initPrograms (SourceCollections& sourceCollections) const
1362 {
1363         std::ostringstream src;
1364         src << "#version 310 es\n"
1365                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1366                 << "layout(binding = 0) buffer InOut {\n"
1367                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1368                 << "} sb_inout;\n"
1369                 << "void main (void) {\n"
1370                 << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1371                 << "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1372                 << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1373                 << "    uint offset          = numValuesPerInv*groupNdx;\n"
1374                 << "\n"
1375                 << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1376                 << "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1377                 << "}\n";
1378
1379         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1380 }
1381
1382 TestInstance* InvertSSBOInPlaceTest::createInstance (Context& context) const
1383 {
1384         return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize);
1385 }
1386
1387 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance (Context&                  context,
1388                                                                                                                           const deUint32        numValues,
1389                                                                                                                           const tcu::IVec3&     localSize,
1390                                                                                                                           const tcu::IVec3&     workSize)
1391         : TestInstance  (context)
1392         , m_numValues   (numValues)
1393         , m_localSize   (localSize)
1394         , m_workSize    (workSize)
1395 {
1396 }
1397
1398 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate (void)
1399 {
1400         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1401         const VkDevice                  device                          = m_context.getDevice();
1402         const VkQueue                   queue                           = m_context.getUniversalQueue();
1403         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1404         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1405
1406         // Create an input/output buffer
1407
1408         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1409         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1410
1411         // Fill the buffer with data
1412
1413         typedef std::vector<deUint32> data_vector_t;
1414         data_vector_t inputData(m_numValues);
1415
1416         {
1417                 de::Random rnd(0x82ce7f);
1418                 const Allocation& bufferAllocation = buffer.getAllocation();
1419                 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1420                 for (deUint32 i = 0; i < m_numValues; ++i)
1421                         inputData[i] = *bufferPtr++ = rnd.getUint32();
1422
1423                 flushMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1424         }
1425
1426         // Create descriptor set
1427
1428         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1429                 DescriptorSetLayoutBuilder()
1430                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1431                 .build(vk, device));
1432
1433         const Unique<VkDescriptorPool> descriptorPool(
1434                 DescriptorPoolBuilder()
1435                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1436                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1437
1438         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1439
1440         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1441         DescriptorSetUpdateBuilder()
1442                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1443                 .update(vk, device);
1444
1445         // Perform the computation
1446
1447         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1448         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1449         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1450
1451         const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1452
1453         const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1454
1455         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1456         const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1457
1458         // Start recording commands
1459
1460         beginCommandBuffer(vk, *cmdBuffer);
1461
1462         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1463         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1464
1465         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1466         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1467         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1468
1469         endCommandBuffer(vk, *cmdBuffer);
1470
1471         // Wait for completion
1472
1473         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1474
1475         // Validate the results
1476
1477         const Allocation& bufferAllocation = buffer.getAllocation();
1478         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1479
1480         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1481
1482         for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1483         {
1484                 const deUint32 res = bufferPtr[ndx];
1485                 const deUint32 ref = ~inputData[ndx];
1486
1487                 if (res != ref)
1488                 {
1489                         std::ostringstream msg;
1490                         msg << "Comparison failed for InOut.values[" << ndx << "]";
1491                         return tcu::TestStatus::fail(msg.str());
1492                 }
1493         }
1494         return tcu::TestStatus::pass("Compute succeeded");
1495 }
1496
1497 class WriteToMultipleSSBOTest : public vkt::TestCase
1498 {
1499 public:
1500                                                 WriteToMultipleSSBOTest (tcu::TestContext&      testCtx,
1501                                                                                                  const std::string&     name,
1502                                                                                                  const std::string&     description,
1503                                                                                                  const deUint32         numValues,
1504                                                                                                  const bool                     sized,
1505                                                                                                  const tcu::IVec3&      localSize,
1506                                                                                                  const tcu::IVec3&      workSize);
1507
1508         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1509         TestInstance*           createInstance                  (Context&                       context) const;
1510
1511 private:
1512         const deUint32          m_numValues;
1513         const bool                      m_sized;
1514         const tcu::IVec3        m_localSize;
1515         const tcu::IVec3        m_workSize;
1516 };
1517
1518 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1519 {
1520 public:
1521                                                                         WriteToMultipleSSBOTestInstance (Context&                       context,
1522                                                                                                                                          const deUint32         numValues,
1523                                                                                                                                          const tcu::IVec3&      localSize,
1524                                                                                                                                          const tcu::IVec3&      workSize);
1525
1526         tcu::TestStatus                                 iterate                                                 (void);
1527
1528 private:
1529         const deUint32                                  m_numValues;
1530         const tcu::IVec3                                m_localSize;
1531         const tcu::IVec3                                m_workSize;
1532 };
1533
1534 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest (tcu::TestContext&             testCtx,
1535                                                                                                   const std::string&    name,
1536                                                                                                   const std::string&    description,
1537                                                                                                   const deUint32                numValues,
1538                                                                                                   const bool                    sized,
1539                                                                                                   const tcu::IVec3&             localSize,
1540                                                                                                   const tcu::IVec3&             workSize)
1541         : TestCase              (testCtx, name, description)
1542         , m_numValues   (numValues)
1543         , m_sized               (sized)
1544         , m_localSize   (localSize)
1545         , m_workSize    (workSize)
1546 {
1547         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1548 }
1549
1550 void WriteToMultipleSSBOTest::initPrograms (SourceCollections& sourceCollections) const
1551 {
1552         std::ostringstream src;
1553         src << "#version 310 es\n"
1554                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1555                 << "layout(binding = 0) writeonly buffer Out0 {\n"
1556                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1557                 << "} sb_out0;\n"
1558                 << "layout(binding = 1) writeonly buffer Out1 {\n"
1559                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1560                 << "} sb_out1;\n"
1561                 << "void main (void) {\n"
1562                 << "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1563                 << "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1564                 << "\n"
1565                 << "    {\n"
1566                 << "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1567                 << "        uint offset          = numValuesPerInv*groupNdx;\n"
1568                 << "\n"
1569                 << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1570                 << "            sb_out0.values[offset + ndx] = offset + ndx;\n"
1571                 << "    }\n"
1572                 << "    {\n"
1573                 << "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1574                 << "        uint offset          = numValuesPerInv*groupNdx;\n"
1575                 << "\n"
1576                 << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1577                 << "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1578                 << "    }\n"
1579                 << "}\n";
1580
1581         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1582 }
1583
1584 TestInstance* WriteToMultipleSSBOTest::createInstance (Context& context) const
1585 {
1586         return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize);
1587 }
1588
1589 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance (Context&                      context,
1590                                                                                                                                   const deUint32        numValues,
1591                                                                                                                                   const tcu::IVec3&     localSize,
1592                                                                                                                                   const tcu::IVec3&     workSize)
1593         : TestInstance  (context)
1594         , m_numValues   (numValues)
1595         , m_localSize   (localSize)
1596         , m_workSize    (workSize)
1597 {
1598 }
1599
1600 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate (void)
1601 {
1602         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1603         const VkDevice                  device                          = m_context.getDevice();
1604         const VkQueue                   queue                           = m_context.getUniversalQueue();
1605         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1606         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1607
1608         // Create two output buffers
1609
1610         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1611         const Buffer buffer0(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1612         const Buffer buffer1(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1613
1614         // Create descriptor set
1615
1616         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1617                 DescriptorSetLayoutBuilder()
1618                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1619                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1620                 .build(vk, device));
1621
1622         const Unique<VkDescriptorPool> descriptorPool(
1623                 DescriptorPoolBuilder()
1624                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1625                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1626
1627         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1628
1629         const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1630         const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1631         DescriptorSetUpdateBuilder()
1632                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1633                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1634                 .update(vk, device);
1635
1636         // Perform the computation
1637
1638         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1639         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1640         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1641
1642         const VkBufferMemoryBarrier shaderWriteBarriers[] =
1643         {
1644                 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes),
1645                 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes)
1646         };
1647
1648         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1649         const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1650
1651         // Start recording commands
1652
1653         beginCommandBuffer(vk, *cmdBuffer);
1654
1655         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1656         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1657
1658         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1659         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, DE_LENGTH_OF_ARRAY(shaderWriteBarriers), shaderWriteBarriers, 0, (const VkImageMemoryBarrier*)DE_NULL);
1660
1661         endCommandBuffer(vk, *cmdBuffer);
1662
1663         // Wait for completion
1664
1665         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1666
1667         // Validate the results
1668         {
1669                 const Allocation& buffer0Allocation = buffer0.getAllocation();
1670                 invalidateMappedMemoryRange(vk, device, buffer0Allocation.getMemory(), buffer0Allocation.getOffset(), bufferSizeBytes);
1671                 const deUint32* buffer0Ptr = static_cast<deUint32*>(buffer0Allocation.getHostPtr());
1672
1673                 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1674                 {
1675                         const deUint32 res = buffer0Ptr[ndx];
1676                         const deUint32 ref = ndx;
1677
1678                         if (res != ref)
1679                         {
1680                                 std::ostringstream msg;
1681                                 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1682                                 return tcu::TestStatus::fail(msg.str());
1683                         }
1684                 }
1685         }
1686         {
1687                 const Allocation& buffer1Allocation = buffer1.getAllocation();
1688                 invalidateMappedMemoryRange(vk, device, buffer1Allocation.getMemory(), buffer1Allocation.getOffset(), bufferSizeBytes);
1689                 const deUint32* buffer1Ptr = static_cast<deUint32*>(buffer1Allocation.getHostPtr());
1690
1691                 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1692                 {
1693                         const deUint32 res = buffer1Ptr[ndx];
1694                         const deUint32 ref = m_numValues - ndx;
1695
1696                         if (res != ref)
1697                         {
1698                                 std::ostringstream msg;
1699                                 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1700                                 return tcu::TestStatus::fail(msg.str());
1701                         }
1702                 }
1703         }
1704         return tcu::TestStatus::pass("Compute succeeded");
1705 }
1706
1707 class SSBOBarrierTest : public vkt::TestCase
1708 {
1709 public:
1710                                                 SSBOBarrierTest         (tcu::TestContext&      testCtx,
1711                                                                                          const std::string&     name,
1712                                                                                          const std::string&     description,
1713                                                                                          const tcu::IVec3&      workSize);
1714
1715         void                            initPrograms            (SourceCollections& sourceCollections) const;
1716         TestInstance*           createInstance          (Context&                       context) const;
1717
1718 private:
1719         const tcu::IVec3        m_workSize;
1720 };
1721
1722 class SSBOBarrierTestInstance : public vkt::TestInstance
1723 {
1724 public:
1725                                                                         SSBOBarrierTestInstance         (Context&                       context,
1726                                                                                                                                  const tcu::IVec3&      workSize);
1727
1728         tcu::TestStatus                                 iterate                                         (void);
1729
1730 private:
1731         const tcu::IVec3                                m_workSize;
1732 };
1733
1734 SSBOBarrierTest::SSBOBarrierTest (tcu::TestContext&             testCtx,
1735                                                                   const std::string&    name,
1736                                                                   const std::string&    description,
1737                                                                   const tcu::IVec3&             workSize)
1738         : TestCase              (testCtx, name, description)
1739         , m_workSize    (workSize)
1740 {
1741 }
1742
1743 void SSBOBarrierTest::initPrograms (SourceCollections& sourceCollections) const
1744 {
1745         sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
1746                 "#version 310 es\n"
1747                 "layout (local_size_x = 1) in;\n"
1748                 "layout(binding = 2) readonly uniform Constants {\n"
1749                 "    uint u_baseVal;\n"
1750                 "};\n"
1751                 "layout(binding = 1) writeonly buffer Output {\n"
1752                 "    uint values[];\n"
1753                 "};\n"
1754                 "void main (void) {\n"
1755                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1756                 "    values[offset] = u_baseVal + offset;\n"
1757                 "}\n");
1758
1759         sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
1760                 "#version 310 es\n"
1761                 "layout (local_size_x = 1) in;\n"
1762                 "layout(binding = 1) readonly buffer Input {\n"
1763                 "    uint values[];\n"
1764                 "};\n"
1765                 "layout(binding = 0) coherent buffer Output {\n"
1766                 "    uint sum;\n"
1767                 "};\n"
1768                 "void main (void) {\n"
1769                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1770                 "    uint value  = values[offset];\n"
1771                 "    atomicAdd(sum, value);\n"
1772                 "}\n");
1773 }
1774
1775 TestInstance* SSBOBarrierTest::createInstance (Context& context) const
1776 {
1777         return new SSBOBarrierTestInstance(context, m_workSize);
1778 }
1779
1780 SSBOBarrierTestInstance::SSBOBarrierTestInstance (Context& context, const tcu::IVec3& workSize)
1781         : TestInstance  (context)
1782         , m_workSize    (workSize)
1783 {
1784 }
1785
1786 tcu::TestStatus SSBOBarrierTestInstance::iterate (void)
1787 {
1788         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1789         const VkDevice                  device                          = m_context.getDevice();
1790         const VkQueue                   queue                           = m_context.getUniversalQueue();
1791         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1792         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1793
1794         // Create a work buffer used by both shaders
1795
1796         const int workGroupCount = multiplyComponents(m_workSize);
1797         const VkDeviceSize workBufferSizeBytes = sizeof(deUint32) * workGroupCount;
1798         const Buffer workBuffer(vk, device, allocator, makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any);
1799
1800         // Create an output buffer
1801
1802         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
1803         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1804
1805         // Initialize atomic counter value to zero
1806         {
1807                 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1808                 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1809                 *outputBufferPtr = 0;
1810                 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1811         }
1812
1813         // Create a uniform buffer (to pass uniform constants)
1814
1815         const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
1816         const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
1817
1818         // Set the constants in the uniform buffer
1819
1820         const deUint32  baseValue = 127;
1821         {
1822                 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
1823                 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
1824                 uniformBufferPtr[0] = baseValue;
1825
1826                 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
1827         }
1828
1829         // Create descriptor set
1830
1831         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1832                 DescriptorSetLayoutBuilder()
1833                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1834                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1835                 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1836                 .build(vk, device));
1837
1838         const Unique<VkDescriptorPool> descriptorPool(
1839                 DescriptorPoolBuilder()
1840                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1841                 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1842                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1843
1844         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1845
1846         const VkDescriptorBufferInfo workBufferDescriptorInfo = makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1847         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1848         const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1849         DescriptorSetUpdateBuilder()
1850                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1851                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1852                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1853                 .update(vk, device);
1854
1855         // Perform the computation
1856
1857         const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
1858         const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
1859
1860         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1861         const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
1862         const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
1863
1864         const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
1865
1866         const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
1867
1868         const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
1869
1870         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1871         const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1872
1873         // Start recording commands
1874
1875         beginCommandBuffer(vk, *cmdBuffer);
1876
1877         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
1878         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1879
1880         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1881
1882         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1883         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &betweenShadersBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1884
1885         // Switch to the second shader program
1886         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
1887
1888         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1889         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1890
1891         endCommandBuffer(vk, *cmdBuffer);
1892
1893         // Wait for completion
1894
1895         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1896
1897         // Validate the results
1898
1899         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1900         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1901
1902         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1903         const deUint32  res = *bufferPtr;
1904         deUint32                ref = 0;
1905
1906         for (int ndx = 0; ndx < workGroupCount; ++ndx)
1907                 ref += baseValue + ndx;
1908
1909         if (res != ref)
1910         {
1911                 std::ostringstream msg;
1912                 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
1913                 return tcu::TestStatus::fail(msg.str());
1914         }
1915         return tcu::TestStatus::pass("Compute succeeded");
1916 }
1917
1918 class ImageAtomicOpTest : public vkt::TestCase
1919 {
1920 public:
1921                                                 ImageAtomicOpTest               (tcu::TestContext&      testCtx,
1922                                                                                                  const std::string& name,
1923                                                                                                  const std::string& description,
1924                                                                                                  const deUint32         localSize,
1925                                                                                                  const tcu::IVec2&      imageSize);
1926
1927         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1928         TestInstance*           createInstance                  (Context&                       context) const;
1929
1930 private:
1931         const deUint32          m_localSize;
1932         const tcu::IVec2        m_imageSize;
1933 };
1934
1935 class ImageAtomicOpTestInstance : public vkt::TestInstance
1936 {
1937 public:
1938                                                                         ImageAtomicOpTestInstance               (Context&                       context,
1939                                                                                                                                          const deUint32         localSize,
1940                                                                                                                                          const tcu::IVec2&      imageSize);
1941
1942         tcu::TestStatus                                 iterate                                                 (void);
1943
1944 private:
1945         const deUint32                                  m_localSize;
1946         const tcu::IVec2                                m_imageSize;
1947 };
1948
1949 ImageAtomicOpTest::ImageAtomicOpTest (tcu::TestContext&         testCtx,
1950                                                                           const std::string&    name,
1951                                                                           const std::string&    description,
1952                                                                           const deUint32                localSize,
1953                                                                           const tcu::IVec2&             imageSize)
1954         : TestCase              (testCtx, name, description)
1955         , m_localSize   (localSize)
1956         , m_imageSize   (imageSize)
1957 {
1958 }
1959
1960 void ImageAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
1961 {
1962         std::ostringstream src;
1963         src << "#version 310 es\n"
1964                 << "#extension GL_OES_shader_image_atomic : require\n"
1965                 << "layout (local_size_x = " << m_localSize << ") in;\n"
1966                 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
1967                 << "layout(binding = 0) readonly buffer Input {\n"
1968                 << "    uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
1969                 << "} sb_in;\n\n"
1970                 << "void main (void) {\n"
1971                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1972                 << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1973                 << "\n"
1974                 << "    if (gl_LocalInvocationIndex == 0u)\n"
1975                 << "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1976                 << "    memoryBarrierImage();\n"
1977                 << "    barrier();\n"
1978                 << "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1979                 << "}\n";
1980
1981         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1982 }
1983
1984 TestInstance* ImageAtomicOpTest::createInstance (Context& context) const
1985 {
1986         return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize);
1987 }
1988
1989 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance (Context& context, const deUint32 localSize, const tcu::IVec2& imageSize)
1990         : TestInstance  (context)
1991         , m_localSize   (localSize)
1992         , m_imageSize   (imageSize)
1993 {
1994 }
1995
1996 tcu::TestStatus ImageAtomicOpTestInstance::iterate (void)
1997 {
1998         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1999         const VkDevice                  device                          = m_context.getDevice();
2000         const VkQueue                   queue                           = m_context.getUniversalQueue();
2001         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
2002         Allocator&                              allocator                       = m_context.getDefaultAllocator();
2003
2004         // Create an image
2005
2006         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2007         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2008
2009         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2010         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2011
2012         // Input buffer
2013
2014         const deUint32 numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2015         const VkDeviceSize inputBufferSizeBytes = sizeof(deUint32) * numInputValues;
2016
2017         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2018
2019         // Populate the input buffer with test data
2020         {
2021                 de::Random rnd(0x77238ac2);
2022                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
2023                 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
2024                 for (deUint32 i = 0; i < numInputValues; ++i)
2025                         *bufferPtr++ = rnd.getUint32();
2026
2027                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), inputBufferSizeBytes);
2028         }
2029
2030         // Create a buffer to store shader output (copied from image data)
2031
2032         const deUint32 imageArea = multiplyComponents(m_imageSize);
2033         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32) * imageArea;
2034         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
2035
2036         // Create descriptor set
2037
2038         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2039                 DescriptorSetLayoutBuilder()
2040                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2041                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2042                 .build(vk, device));
2043
2044         const Unique<VkDescriptorPool> descriptorPool(
2045                 DescriptorPoolBuilder()
2046                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2047                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2048                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2049
2050         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2051
2052         // Set the bindings
2053
2054         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2055         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2056
2057         DescriptorSetUpdateBuilder()
2058                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2059                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2060                 .update(vk, device);
2061
2062         // Perform the computation
2063         {
2064                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
2065                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2066                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2067
2068                 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2069
2070                 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2071                         (VkAccessFlags)0, VK_ACCESS_SHADER_WRITE_BIT,
2072                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2073                         *image, subresourceRange);
2074
2075                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
2076                         VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
2077                         VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2078                         *image, subresourceRange);
2079
2080                 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2081
2082                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
2083
2084                 // Prepare the command buffer
2085
2086                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2087                 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2088
2089                 // Start recording commands
2090
2091                 beginCommandBuffer(vk, *cmdBuffer);
2092
2093                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2094                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2095
2096                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
2097                 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2098
2099                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
2100                 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &copyParams);
2101                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2102
2103                 endCommandBuffer(vk, *cmdBuffer);
2104
2105                 // Wait for completion
2106
2107                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2108         }
2109
2110         // Validate the results
2111
2112         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2113         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2114
2115         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2116         const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
2117
2118         for (deUint32 pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2119         {
2120                 const deUint32  res = bufferPtr[pixelNdx];
2121                 deUint32                ref = 0;
2122
2123                 for (deUint32 offs = 0; offs < m_localSize; ++offs)
2124                         ref += refBufferPtr[pixelNdx * m_localSize + offs];
2125
2126                 if (res != ref)
2127                 {
2128                         std::ostringstream msg;
2129                         msg << "Comparison failed for pixel " << pixelNdx;
2130                         return tcu::TestStatus::fail(msg.str());
2131                 }
2132         }
2133         return tcu::TestStatus::pass("Compute succeeded");
2134 }
2135
2136 class ImageBarrierTest : public vkt::TestCase
2137 {
2138 public:
2139                                                 ImageBarrierTest        (tcu::TestContext&      testCtx,
2140                                                                                         const std::string&      name,
2141                                                                                         const std::string&      description,
2142                                                                                         const tcu::IVec2&       imageSize);
2143
2144         void                            initPrograms            (SourceCollections& sourceCollections) const;
2145         TestInstance*           createInstance          (Context&                       context) const;
2146
2147 private:
2148         const tcu::IVec2        m_imageSize;
2149 };
2150
2151 class ImageBarrierTestInstance : public vkt::TestInstance
2152 {
2153 public:
2154                                                                         ImageBarrierTestInstance        (Context&                       context,
2155                                                                                                                                  const tcu::IVec2&      imageSize);
2156
2157         tcu::TestStatus                                 iterate                                         (void);
2158
2159 private:
2160         const tcu::IVec2                                m_imageSize;
2161 };
2162
2163 ImageBarrierTest::ImageBarrierTest (tcu::TestContext&   testCtx,
2164                                                                         const std::string&      name,
2165                                                                         const std::string&      description,
2166                                                                         const tcu::IVec2&       imageSize)
2167         : TestCase              (testCtx, name, description)
2168         , m_imageSize   (imageSize)
2169 {
2170 }
2171
2172 void ImageBarrierTest::initPrograms (SourceCollections& sourceCollections) const
2173 {
2174         sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
2175                 "#version 310 es\n"
2176                 "layout (local_size_x = 1) in;\n"
2177                 "layout(binding = 2) readonly uniform Constants {\n"
2178                 "    uint u_baseVal;\n"
2179                 "};\n"
2180                 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2181                 "void main (void) {\n"
2182                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2183                 "    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2184                 "}\n");
2185
2186         sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
2187                 "#version 310 es\n"
2188                 "layout (local_size_x = 1) in;\n"
2189                 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2190                 "layout(binding = 0) coherent buffer Output {\n"
2191                 "    uint sum;\n"
2192                 "};\n"
2193                 "void main (void) {\n"
2194                 "    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2195                 "    atomicAdd(sum, value);\n"
2196                 "}\n");
2197 }
2198
2199 TestInstance* ImageBarrierTest::createInstance (Context& context) const
2200 {
2201         return new ImageBarrierTestInstance(context, m_imageSize);
2202 }
2203
2204 ImageBarrierTestInstance::ImageBarrierTestInstance (Context& context, const tcu::IVec2& imageSize)
2205         : TestInstance  (context)
2206         , m_imageSize   (imageSize)
2207 {
2208 }
2209
2210 tcu::TestStatus ImageBarrierTestInstance::iterate (void)
2211 {
2212         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
2213         const VkDevice                  device                          = m_context.getDevice();
2214         const VkQueue                   queue                           = m_context.getUniversalQueue();
2215         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
2216         Allocator&                              allocator                       = m_context.getDefaultAllocator();
2217
2218         // Create an image used by both shaders
2219
2220         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2221         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2222
2223         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2224         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2225
2226         // Create an output buffer
2227
2228         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
2229         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2230
2231         // Initialize atomic counter value to zero
2232         {
2233                 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2234                 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2235                 *outputBufferPtr = 0;
2236                 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2237         }
2238
2239         // Create a uniform buffer (to pass uniform constants)
2240
2241         const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
2242         const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2243
2244         // Set the constants in the uniform buffer
2245
2246         const deUint32  baseValue = 127;
2247         {
2248                 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
2249                 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
2250                 uniformBufferPtr[0] = baseValue;
2251
2252                 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
2253         }
2254
2255         // Create descriptor set
2256
2257         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2258                 DescriptorSetLayoutBuilder()
2259                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2260                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2261                 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2262                 .build(vk, device));
2263
2264         const Unique<VkDescriptorPool> descriptorPool(
2265                 DescriptorPoolBuilder()
2266                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2267                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2268                 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2269                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2270
2271         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2272
2273         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2274         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2275         const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2276         DescriptorSetUpdateBuilder()
2277                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2278                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2279                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2280                 .update(vk, device);
2281
2282         // Perform the computation
2283
2284         const Unique<VkShaderModule>    shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
2285         const Unique<VkShaderModule>    shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
2286
2287         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2288         const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
2289         const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
2290
2291         const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2292
2293         const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2294                 0u, 0u,
2295                 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2296                 *image, subresourceRange);
2297
2298         const VkImageMemoryBarrier imageBarrierBetweenShaders = makeImageMemoryBarrier(
2299                 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
2300                 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2301                 *image, subresourceRange);
2302
2303         const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2304
2305         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2306         const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2307
2308         // Start recording commands
2309
2310         beginCommandBuffer(vk, *cmdBuffer);
2311
2312         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
2313         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2314
2315         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 1, &imageLayoutBarrier);
2316
2317         vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2318         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imageBarrierBetweenShaders);
2319
2320         // Switch to the second shader program
2321         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
2322
2323         vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2324         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2325
2326         endCommandBuffer(vk, *cmdBuffer);
2327
2328         // Wait for completion
2329
2330         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2331
2332         // Validate the results
2333
2334         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2335         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2336
2337         const int               numValues = multiplyComponents(m_imageSize);
2338         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2339         const deUint32  res = *bufferPtr;
2340         deUint32                ref = 0;
2341
2342         for (int ndx = 0; ndx < numValues; ++ndx)
2343                 ref += baseValue + ndx;
2344
2345         if (res != ref)
2346         {
2347                 std::ostringstream msg;
2348                 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2349                 return tcu::TestStatus::fail(msg.str());
2350         }
2351         return tcu::TestStatus::pass("Compute succeeded");
2352 }
2353
2354 namespace EmptyShaderTest
2355 {
2356
2357 void createProgram (SourceCollections& dst)
2358 {
2359         dst.glslSources.add("comp") << glu::ComputeSource(
2360                 "#version 310 es\n"
2361                 "layout (local_size_x = 1) in;\n"
2362                 "void main (void) {}\n"
2363         );
2364 }
2365
2366 tcu::TestStatus createTest (Context& context)
2367 {
2368         const DeviceInterface&  vk                                      = context.getDeviceInterface();
2369         const VkDevice                  device                          = context.getDevice();
2370         const VkQueue                   queue                           = context.getUniversalQueue();
2371         const deUint32                  queueFamilyIndex        = context.getUniversalQueueFamilyIndex();
2372
2373         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u));
2374
2375         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device));
2376         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2377
2378         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2379         const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2380
2381         // Start recording commands
2382
2383         beginCommandBuffer(vk, *cmdBuffer);
2384
2385         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2386
2387         const tcu::IVec3 workGroups(1, 1, 1);
2388         vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
2389
2390         endCommandBuffer(vk, *cmdBuffer);
2391
2392         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2393
2394         return tcu::TestStatus::pass("Compute succeeded");
2395 }
2396
2397 } // EmptyShaderTest ns
2398 } // anonymous
2399
2400 tcu::TestCaseGroup* createBasicComputeShaderTests (tcu::TestContext& testCtx)
2401 {
2402         de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic", "Basic compute tests"));
2403
2404         addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", "Shader that does nothing", EmptyShaderTest::createProgram, EmptyShaderTest::createTest);
2405
2406         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_single_invocation",        "Copy from UBO to SSBO, inverting bits",        256,    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2407         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_single_group",                     "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(2,1,4),      tcu::IVec3(1,1,1)));
2408         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_multiple_invocations",     "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(1,1,1),      tcu::IVec3(2,4,1)));
2409         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_multiple_groups",          "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2410
2411         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_single_invocation",          "Copy between SSBOs, inverting bits",   256,    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2412         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_multiple_invocations",       "Copy between SSBOs, inverting bits",   1024,   tcu::IVec3(1,1,1),      tcu::IVec3(2,4,1)));
2413         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_multiple_groups",            "Copy between SSBOs, inverting bits",   1024,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2414
2415         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_rw_single_invocation",                    "Read and write same SSBO",             256,    true,   tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2416         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_rw_multiple_groups",                              "Read and write same SSBO",             1024,   true,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2417         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_unsized_arr_single_invocation",   "Read and write same SSBO",             256,    false,  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2418         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_unsized_arr_multiple_groups",             "Read and write same SSBO",             1024,   false,  tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2419
2420         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_arr_single_invocation",                 "Write to multiple SSBOs",      256,    true,   tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2421         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_arr_multiple_groups",                   "Write to multiple SSBOs",      1024,   true,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2422         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs",      256,    false,  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2423         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_unsized_arr_multiple_groups",   "Write to multiple SSBOs",      1024,   false,  tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2424
2425         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_single_invocation", "SSBO local barrier usage",     tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2426         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_single_group",              "SSBO local barrier usage",     tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2427         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_multiple_groups",   "SSBO local barrier usage",     tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2428
2429         basicComputeTests->addChild(new SSBOBarrierTest(testCtx,        "ssbo_cmd_barrier_single",              "SSBO memory barrier usage",    tcu::IVec3(1,1,1)));
2430         basicComputeTests->addChild(new SSBOBarrierTest(testCtx,        "ssbo_cmd_barrier_multiple",    "SSBO memory barrier usage",    tcu::IVec3(11,5,7)));
2431
2432         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_single_invocation",         "Basic shared variable usage",  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2433         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_single_group",                      "Basic shared variable usage",  tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2434         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_multiple_invocations",      "Basic shared variable usage",  tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
2435         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_multiple_groups",           "Basic shared variable usage",  tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2436
2437         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_single_invocation",           "Atomic operation with shared var",             tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2438         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_single_group",                        "Atomic operation with shared var",             tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2439         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_multiple_invocations",        "Atomic operation with shared var",             tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
2440         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_multiple_groups",                     "Atomic operation with shared var",             tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2441
2442         basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx,    "copy_image_to_ssbo_small",     "Image to SSBO copy",   tcu::IVec2(1,1),        tcu::IVec2(64,64)));
2443         basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx,    "copy_image_to_ssbo_large",     "Image to SSBO copy",   tcu::IVec2(2,4),        tcu::IVec2(512,512)));
2444
2445         basicComputeTests->addChild(new CopySSBOToImageTest(testCtx,    "copy_ssbo_to_image_small",     "SSBO to image copy",   tcu::IVec2(1, 1),       tcu::IVec2(64, 64)));
2446         basicComputeTests->addChild(new CopySSBOToImageTest(testCtx,    "copy_ssbo_to_image_large",     "SSBO to image copy",   tcu::IVec2(2, 4),       tcu::IVec2(512, 512)));
2447
2448         basicComputeTests->addChild(new ImageAtomicOpTest(testCtx,      "image_atomic_op_local_size_1", "Atomic operation with image",  1,      tcu::IVec2(64,64)));
2449         basicComputeTests->addChild(new ImageAtomicOpTest(testCtx,      "image_atomic_op_local_size_8", "Atomic operation with image",  8,      tcu::IVec2(64,64)));
2450
2451         basicComputeTests->addChild(new ImageBarrierTest(testCtx,       "image_barrier_single",         "Image barrier",        tcu::IVec2(1,1)));
2452         basicComputeTests->addChild(new ImageBarrierTest(testCtx,       "image_barrier_multiple",       "Image barrier",        tcu::IVec2(64,64)));
2453
2454         return basicComputeTests.release();
2455 }
2456
2457 } // compute
2458 } // vkt