am 019fca93: (-s ours) am 8d2a27e0: Remove problematic srgb8_alpha8_astc copy image...
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / compute / vktComputeBasicComputeShaderTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 Google Inc.
6  * Copyright (c) 2015 Mobica Ltd.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and/or associated documentation files (the
10  * "Materials"), to deal in the Materials without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sublicense, and/or sell copies of the Materials, and to
13  * permit persons to whom the Materials are furnished to do so, subject to
14  * the following conditions:
15  *
16  * The above copyright notice(s) and this permission notice shall be included
17  * in all copies or substantial portions of the Materials.
18  *
19  * The Materials are Confidential Information as defined by the
20  * Khronos Membership Agreement until designated non-confidential by Khronos,
21  * at which point this condition clause shall be removed.
22  *
23  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
26  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
27  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
28  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
29  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
30  *
31  *//*!
32  * \file
33  * \brief Compute Shader Tests
34  *//*--------------------------------------------------------------------*/
35
36 #include "vktComputeBasicComputeShaderTests.hpp"
37 #include "vktTestCase.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "vktComputeTestsUtil.hpp"
40
41 #include "vkDefs.hpp"
42 #include "vkRef.hpp"
43 #include "vkRefUtil.hpp"
44 #include "vkPlatform.hpp"
45 #include "vkPrograms.hpp"
46 #include "vkRefUtil.hpp"
47 #include "vkMemUtil.hpp"
48 #include "vkQueryUtil.hpp"
49 #include "vkBuilderUtil.hpp"
50 #include "vkTypeUtil.hpp"
51
52 #include "deStringUtil.hpp"
53 #include "deUniquePtr.hpp"
54 #include "deRandom.hpp"
55
56 #include <vector>
57
58 using namespace vk;
59
60 namespace vkt
61 {
62 namespace compute
63 {
64 namespace
65 {
66
67 template<typename T, int size>
68 T multiplyComponents (const tcu::Vector<T, size>& v)
69 {
70         T accum = 1;
71         for (int i = 0; i < size; ++i)
72                 accum *= v[i];
73         return accum;
74 }
75
76 template<typename T>
77 inline T squared (const T& a)
78 {
79         return a * a;
80 }
81
82 inline VkImageCreateInfo make2DImageCreateInfo (const tcu::IVec2& imageSize, const VkImageUsageFlags usage)
83 {
84         const VkImageCreateInfo imageParams =
85         {
86                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                            // VkStructureType                      sType;
87                 DE_NULL,                                                                                        // const void*                          pNext;
88                 0u,                                                                                                     // VkImageCreateFlags           flags;
89                 VK_IMAGE_TYPE_2D,                                                                       // VkImageType                          imageType;
90                 VK_FORMAT_R32_UINT,                                                                     // VkFormat                                     format;
91                 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1),      // VkExtent3D                           extent;
92                 1u,                                                                                                     // deUint32                                     mipLevels;
93                 1u,                                                                                                     // deUint32                                     arrayLayers;
94                 VK_SAMPLE_COUNT_1_BIT,                                                          // VkSampleCountFlagBits        samples;
95                 VK_IMAGE_TILING_OPTIMAL,                                                        // VkImageTiling                        tiling;
96                 usage,                                                                                          // VkImageUsageFlags            usage;
97                 VK_SHARING_MODE_EXCLUSIVE,                                                      // VkSharingMode                        sharingMode;
98                 0u,                                                                                                     // deUint32                                     queueFamilyIndexCount;
99                 DE_NULL,                                                                                        // const deUint32*                      pQueueFamilyIndices;
100                 VK_IMAGE_LAYOUT_UNDEFINED,                                                      // VkImageLayout                        initialLayout;
101         };
102         return imageParams;
103 }
104
105 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2& imageSize)
106 {
107         return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
108 }
109
110 enum BufferType
111 {
112         BUFFER_TYPE_UNIFORM,
113         BUFFER_TYPE_SSBO,
114 };
115
116 class SharedVarTest : public vkt::TestCase
117 {
118 public:
119                                                 SharedVarTest   (tcu::TestContext&              testCtx,
120                                                                                  const std::string&             name,
121                                                                                  const std::string&             description,
122                                                                                  const tcu::IVec3&              localSize,
123                                                                                  const tcu::IVec3&              workSize);
124
125         void                            initPrograms    (SourceCollections&             sourceCollections) const;
126         TestInstance*           createInstance  (Context&                               context) const;
127
128 private:
129         const tcu::IVec3        m_localSize;
130         const tcu::IVec3        m_workSize;
131 };
132
133 class SharedVarTestInstance : public vkt::TestInstance
134 {
135 public:
136                                                                         SharedVarTestInstance   (Context&                       context,
137                                                                                                                          const tcu::IVec3&      localSize,
138                                                                                                                          const tcu::IVec3&      workSize);
139
140         tcu::TestStatus                                 iterate                                 (void);
141
142 private:
143         const tcu::IVec3                                m_localSize;
144         const tcu::IVec3                                m_workSize;
145 };
146
147 SharedVarTest::SharedVarTest (tcu::TestContext&         testCtx,
148                                                           const std::string&    name,
149                                                           const std::string&    description,
150                                                           const tcu::IVec3&             localSize,
151                                                           const tcu::IVec3&             workSize)
152         : TestCase              (testCtx, name, description)
153         , m_localSize   (localSize)
154         , m_workSize    (workSize)
155 {
156 }
157
158 void SharedVarTest::initPrograms (SourceCollections& sourceCollections) const
159 {
160         const int workGroupSize = multiplyComponents(m_localSize);
161         const int workGroupCount = multiplyComponents(m_workSize);
162         const int numValues = workGroupSize * workGroupCount;
163
164         std::ostringstream src;
165         src << "#version 310 es\n"
166                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
167                 << "layout(binding = 0) writeonly buffer Output {\n"
168                 << "    uint values[" << numValues << "];\n"
169                 << "} sb_out;\n\n"
170                 << "shared uint offsets[" << workGroupSize << "];\n\n"
171                 << "void main (void) {\n"
172                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
173                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
174                 << "    uint globalOffs = localSize*globalNdx;\n"
175                 << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
176                 << "\n"
177                 << "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
178                 << "    memoryBarrierShared();\n"
179                 << "    barrier();\n"
180                 << "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
181                 << "}\n";
182
183         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
184 }
185
186 TestInstance* SharedVarTest::createInstance (Context& context) const
187 {
188         return new SharedVarTestInstance(context, m_localSize, m_workSize);
189 }
190
191 SharedVarTestInstance::SharedVarTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
192         : TestInstance  (context)
193         , m_localSize   (localSize)
194         , m_workSize    (workSize)
195 {
196 }
197
198 tcu::TestStatus SharedVarTestInstance::iterate (void)
199 {
200         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
201         const VkDevice                  device                          = m_context.getDevice();
202         const VkQueue                   queue                           = m_context.getUniversalQueue();
203         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
204         Allocator&                              allocator                       = m_context.getDefaultAllocator();
205
206         const int workGroupSize = multiplyComponents(m_localSize);
207         const int workGroupCount = multiplyComponents(m_workSize);
208
209         // Create a buffer and host-visible memory for it
210
211         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
212         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
213
214         // Create descriptor set
215
216         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
217                 DescriptorSetLayoutBuilder()
218                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
219                 .build(vk, device));
220
221         const Unique<VkDescriptorPool> descriptorPool(
222                 DescriptorPoolBuilder()
223                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
224                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
225
226         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
227
228         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
229         DescriptorSetUpdateBuilder()
230                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
231                 .update(vk, device);
232
233         // Perform the computation
234
235         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
236         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
237         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
238
239         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
240         const void* barriers[] = { &computeFinishBarrier };
241
242         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
243         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
244
245         // Start recording commands
246
247         beginCommandBuffer(vk, *cmdBuffer);
248
249         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
250         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
251
252         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
253
254         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriers), barriers);
255
256         endCommandBuffer(vk, *cmdBuffer);
257
258         // Wait for completion
259
260         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
261
262         // Validate the results
263
264         const Allocation& bufferAllocation = buffer.getAllocation();
265         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
266
267         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
268
269         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
270         {
271                 const int globalOffset = groupNdx * workGroupSize;
272                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
273                 {
274                         const deUint32 res = bufferPtr[globalOffset + localOffset];
275                         const deUint32 ref = globalOffset + squared(workGroupSize - localOffset - 1);
276
277                         if (res != ref)
278                         {
279                                 std::ostringstream msg;
280                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
281                                 return tcu::TestStatus::fail(msg.str());
282                         }
283                 }
284         }
285         return tcu::TestStatus::pass("Compute succeeded");
286 }
287
288 class SharedVarAtomicOpTest : public vkt::TestCase
289 {
290 public:
291                                                 SharedVarAtomicOpTest   (tcu::TestContext&      testCtx,
292                                                                                                  const std::string&     name,
293                                                                                                  const std::string&     description,
294                                                                                                  const tcu::IVec3&      localSize,
295                                                                                                  const tcu::IVec3&      workSize);
296
297         void                            initPrograms                    (SourceCollections& sourceCollections) const;
298         TestInstance*           createInstance                  (Context&                       context) const;
299
300 private:
301         const tcu::IVec3        m_localSize;
302         const tcu::IVec3        m_workSize;
303 };
304
305 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
306 {
307 public:
308                                                                         SharedVarAtomicOpTestInstance   (Context&                       context,
309                                                                                                                                          const tcu::IVec3&      localSize,
310                                                                                                                                          const tcu::IVec3&      workSize);
311
312         tcu::TestStatus                                 iterate                                                 (void);
313
314 private:
315         const tcu::IVec3                                m_localSize;
316         const tcu::IVec3                                m_workSize;
317 };
318
319 SharedVarAtomicOpTest::SharedVarAtomicOpTest (tcu::TestContext&         testCtx,
320                                                                                           const std::string&    name,
321                                                                                           const std::string&    description,
322                                                                                           const tcu::IVec3&             localSize,
323                                                                                           const tcu::IVec3&             workSize)
324         : TestCase              (testCtx, name, description)
325         , m_localSize   (localSize)
326         , m_workSize    (workSize)
327 {
328 }
329
330 void SharedVarAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
331 {
332         const int workGroupSize = multiplyComponents(m_localSize);
333         const int workGroupCount = multiplyComponents(m_workSize);
334         const int numValues = workGroupSize * workGroupCount;
335
336         std::ostringstream src;
337         src << "#version 310 es\n"
338                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
339                 << "layout(binding = 0) writeonly buffer Output {\n"
340                 << "    uint values[" << numValues << "];\n"
341                 << "} sb_out;\n\n"
342                 << "shared uint count;\n\n"
343                 << "void main (void) {\n"
344                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
345                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
346                 << "    uint globalOffs = localSize*globalNdx;\n"
347                 << "\n"
348                 << "    count = 0u;\n"
349                 << "    memoryBarrierShared();\n"
350                 << "    barrier();\n"
351                 << "    uint oldVal = atomicAdd(count, 1u);\n"
352                 << "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
353                 << "}\n";
354
355         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
356 }
357
358 TestInstance* SharedVarAtomicOpTest::createInstance (Context& context) const
359 {
360         return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize);
361 }
362
363 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
364         : TestInstance  (context)
365         , m_localSize   (localSize)
366         , m_workSize    (workSize)
367 {
368 }
369
370 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate (void)
371 {
372         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
373         const VkDevice                  device                          = m_context.getDevice();
374         const VkQueue                   queue                           = m_context.getUniversalQueue();
375         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
376         Allocator&                              allocator                       = m_context.getDefaultAllocator();
377
378         const int workGroupSize = multiplyComponents(m_localSize);
379         const int workGroupCount = multiplyComponents(m_workSize);
380
381         // Create a buffer and host-visible memory for it
382
383         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
384         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
385
386         // Create descriptor set
387
388         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
389                 DescriptorSetLayoutBuilder()
390                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
391                 .build(vk, device));
392
393         const Unique<VkDescriptorPool> descriptorPool(
394                 DescriptorPoolBuilder()
395                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
396                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
397
398         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
399
400         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
401         DescriptorSetUpdateBuilder()
402                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
403                 .update(vk, device);
404
405         // Perform the computation
406
407         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
408         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
409         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
410
411         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
412         const void* barriers[] = { &computeFinishBarrier };
413
414         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
415         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
416
417         // Start recording commands
418
419         beginCommandBuffer(vk, *cmdBuffer);
420
421         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
422         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
423
424         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
425
426         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriers), barriers);
427
428         endCommandBuffer(vk, *cmdBuffer);
429
430         // Wait for completion
431
432         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
433
434         // Validate the results
435
436         const Allocation& bufferAllocation = buffer.getAllocation();
437         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
438
439         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
440
441         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
442         {
443                 const int globalOffset = groupNdx * workGroupSize;
444                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
445                 {
446                         const deUint32 res = bufferPtr[globalOffset + localOffset];
447                         const deUint32 ref = localOffset + 1;
448
449                         if (res != ref)
450                         {
451                                 std::ostringstream msg;
452                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
453                                 return tcu::TestStatus::fail(msg.str());
454                         }
455                 }
456         }
457         return tcu::TestStatus::pass("Compute succeeded");
458 }
459
460 class SSBOLocalBarrierTest : public vkt::TestCase
461 {
462 public:
463                                                 SSBOLocalBarrierTest    (tcu::TestContext&      testCtx,
464                                                                                                  const std::string& name,
465                                                                                                  const std::string&     description,
466                                                                                                  const tcu::IVec3&      localSize,
467                                                                                                  const tcu::IVec3&      workSize);
468
469         void                            initPrograms                    (SourceCollections& sourceCollections) const;
470         TestInstance*           createInstance                  (Context&                       context) const;
471
472 private:
473         const tcu::IVec3        m_localSize;
474         const tcu::IVec3        m_workSize;
475 };
476
477 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
478 {
479 public:
480                                                                         SSBOLocalBarrierTestInstance    (Context&                       context,
481                                                                                                                                          const tcu::IVec3&      localSize,
482                                                                                                                                          const tcu::IVec3&      workSize);
483
484         tcu::TestStatus                                 iterate                                                 (void);
485
486 private:
487         const tcu::IVec3                                m_localSize;
488         const tcu::IVec3                                m_workSize;
489 };
490
491 SSBOLocalBarrierTest::SSBOLocalBarrierTest (tcu::TestContext&   testCtx,
492                                                                                         const std::string&      name,
493                                                                                         const std::string&      description,
494                                                                                         const tcu::IVec3&       localSize,
495                                                                                         const tcu::IVec3&       workSize)
496         : TestCase              (testCtx, name, description)
497         , m_localSize   (localSize)
498         , m_workSize    (workSize)
499 {
500 }
501
502 void SSBOLocalBarrierTest::initPrograms (SourceCollections& sourceCollections) const
503 {
504         const int workGroupSize = multiplyComponents(m_localSize);
505         const int workGroupCount = multiplyComponents(m_workSize);
506         const int numValues = workGroupSize * workGroupCount;
507
508         std::ostringstream src;
509         src << "#version 310 es\n"
510                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
511                 << "layout(binding = 0) coherent buffer Output {\n"
512                 << "    uint values[" << numValues << "];\n"
513                 << "} sb_out;\n\n"
514                 << "void main (void) {\n"
515                 << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
516                 << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
517                 << "    uint globalOffs = localSize*globalNdx;\n"
518                 << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
519                 << "\n"
520                 << "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
521                 << "    memoryBarrierBuffer();\n"
522                 << "    barrier();\n"
523                 << "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"         // += so we read and write
524                 << "    memoryBarrierBuffer();\n"
525                 << "    barrier();\n"
526                 << "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
527                 << "}\n";
528
529         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
530 }
531
532 TestInstance* SSBOLocalBarrierTest::createInstance (Context& context) const
533 {
534         return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize);
535 }
536
537 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
538         : TestInstance  (context)
539         , m_localSize   (localSize)
540         , m_workSize    (workSize)
541 {
542 }
543
544 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate (void)
545 {
546         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
547         const VkDevice                  device                          = m_context.getDevice();
548         const VkQueue                   queue                           = m_context.getUniversalQueue();
549         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
550         Allocator&                              allocator                       = m_context.getDefaultAllocator();
551
552         const int workGroupSize = multiplyComponents(m_localSize);
553         const int workGroupCount = multiplyComponents(m_workSize);
554
555         // Create a buffer and host-visible memory for it
556
557         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
558         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
559
560         // Create descriptor set
561
562         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
563                 DescriptorSetLayoutBuilder()
564                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
565                 .build(vk, device));
566
567         const Unique<VkDescriptorPool> descriptorPool(
568                 DescriptorPoolBuilder()
569                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
570                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
571
572         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
573
574         const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
575         DescriptorSetUpdateBuilder()
576                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
577                 .update(vk, device);
578
579         // Perform the computation
580
581         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
582         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
583         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
584
585         const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
586         const void* barriers[] = { &computeFinishBarrier };
587
588         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
589         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
590
591         // Start recording commands
592
593         beginCommandBuffer(vk, *cmdBuffer);
594
595         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
596         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
597
598         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
599
600         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriers), barriers);
601
602         endCommandBuffer(vk, *cmdBuffer);
603
604         // Wait for completion
605
606         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
607
608         // Validate the results
609
610         const Allocation& bufferAllocation = buffer.getAllocation();
611         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
612
613         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
614
615         for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
616         {
617                 const int globalOffset = groupNdx * workGroupSize;
618                 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
619                 {
620                         const deUint32  res             = bufferPtr[globalOffset + localOffset];
621                         const int               offs0   = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) : ((localOffset - 1) % workGroupSize);
622                         const int               offs1   = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) : ((localOffset - 2) % workGroupSize);
623                         const deUint32  ref             = static_cast<deUint32>(globalOffset + offs0 + offs1);
624
625                         if (res != ref)
626                         {
627                                 std::ostringstream msg;
628                                 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
629                                 return tcu::TestStatus::fail(msg.str());
630                         }
631                 }
632         }
633         return tcu::TestStatus::pass("Compute succeeded");
634 }
635
636 class CopyImageToSSBOTest : public vkt::TestCase
637 {
638 public:
639                                                 CopyImageToSSBOTest             (tcu::TestContext&      testCtx,
640                                                                                                  const std::string&     name,
641                                                                                                  const std::string&     description,
642                                                                                                  const tcu::IVec2&      localSize,
643                                                                                                  const tcu::IVec2&      imageSize);
644
645         void                            initPrograms                    (SourceCollections& sourceCollections) const;
646         TestInstance*           createInstance                  (Context&                       context) const;
647
648 private:
649         const tcu::IVec2        m_localSize;
650         const tcu::IVec2        m_imageSize;
651 };
652
653 class CopyImageToSSBOTestInstance : public vkt::TestInstance
654 {
655 public:
656                                                                         CopyImageToSSBOTestInstance             (Context&                       context,
657                                                                                                                                          const tcu::IVec2&      localSize,
658                                                                                                                                          const tcu::IVec2&      imageSize);
659
660         tcu::TestStatus                                 iterate                                                 (void);
661
662 private:
663         const tcu::IVec2                                m_localSize;
664         const tcu::IVec2                                m_imageSize;
665 };
666
667 CopyImageToSSBOTest::CopyImageToSSBOTest (tcu::TestContext&             testCtx,
668                                                                                   const std::string&    name,
669                                                                                   const std::string&    description,
670                                                                                   const tcu::IVec2&             localSize,
671                                                                                   const tcu::IVec2&             imageSize)
672         : TestCase              (testCtx, name, description)
673         , m_localSize   (localSize)
674         , m_imageSize   (imageSize)
675 {
676         DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
677         DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
678 }
679
680 void CopyImageToSSBOTest::initPrograms (SourceCollections& sourceCollections) const
681 {
682         std::ostringstream src;
683         src << "#version 310 es\n"
684                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
685                 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
686                 << "layout(binding = 0) writeonly buffer Output {\n"
687                 << "    uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
688                 << "} sb_out;\n\n"
689                 << "void main (void) {\n"
690                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
691                 << "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
692                 << "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
693                 << "}\n";
694
695         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
696 }
697
698 TestInstance* CopyImageToSSBOTest::createInstance (Context& context) const
699 {
700         return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize);
701 }
702
703 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
704         : TestInstance  (context)
705         , m_localSize   (localSize)
706         , m_imageSize   (imageSize)
707 {
708 }
709
710 tcu::TestStatus CopyImageToSSBOTestInstance::iterate (void)
711 {
712         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
713         const VkDevice                  device                          = m_context.getDevice();
714         const VkQueue                   queue                           = m_context.getUniversalQueue();
715         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
716         Allocator&                              allocator                       = m_context.getDefaultAllocator();
717
718         // Create an image
719
720         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
721         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
722
723         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
724         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
725
726         // Staging buffer (source data for image)
727
728         const deUint32 imageArea = multiplyComponents(m_imageSize);
729         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
730
731         const Buffer stagingBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
732
733         // Populate the staging buffer with test data
734         {
735                 de::Random rnd(0xab2c7);
736                 const Allocation& stagingBufferAllocation = stagingBuffer.getAllocation();
737                 deUint32* bufferPtr = static_cast<deUint32*>(stagingBufferAllocation.getHostPtr());
738                 for (deUint32 i = 0; i < imageArea; ++i)
739                         *bufferPtr++ = rnd.getUint32();
740
741                 flushMappedMemoryRange(vk, device, stagingBufferAllocation.getMemory(), stagingBufferAllocation.getOffset(), bufferSizeBytes);
742         }
743
744         // Create a buffer to store shader output
745
746         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
747
748         // Create descriptor set
749
750         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
751                 DescriptorSetLayoutBuilder()
752                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
753                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
754                 .build(vk, device));
755
756         const Unique<VkDescriptorPool> descriptorPool(
757                 DescriptorPoolBuilder()
758                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
759                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
760                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
761
762         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
763
764         // Set the bindings
765
766         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
767         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
768
769         DescriptorSetUpdateBuilder()
770                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
771                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
772                 .update(vk, device);
773
774         // Perform the computation
775         {
776                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
777                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
778                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
779
780                 const VkBufferMemoryBarrier stagingBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *stagingBuffer, 0ull, bufferSizeBytes);
781
782                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
783                         0u, 0u,
784                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
785                         *image, subresourceRange);
786
787                 const VkImageMemoryBarrier imagePostCopyBarrier = makeImageMemoryBarrier(
788                         VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
789                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
790                         *image, subresourceRange);
791
792                 const void* preCopyBarriers[] = { &stagingBufferPostHostWriteBarrier, &imagePreCopyBarrier };
793                 const void* postCopyBarriers[] = { &imagePostCopyBarrier };
794
795                 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
796                 const void* postComputeBarriers[] = { &computeFinishBarrier };
797
798                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
799                 const tcu::IVec2 workSize = m_imageSize / m_localSize;
800
801                 // Prepare the command buffer
802
803                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
804                 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
805
806                 // Start recording commands
807
808                 beginCommandBuffer(vk, *cmdBuffer);
809
810                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
811                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
812
813                 vk.cmdPipelineBarrier(*cmdBuffer, 0u, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preCopyBarriers), preCopyBarriers);
814                 vk.cmdCopyBufferToImage(*cmdBuffer, *stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &copyParams);
815                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postCopyBarriers), postCopyBarriers);
816
817                 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
818                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postComputeBarriers), postComputeBarriers);
819
820                 endCommandBuffer(vk, *cmdBuffer);
821
822                 // Wait for completion
823
824                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
825         }
826
827         // Validate the results
828
829         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
830         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
831
832         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
833         const deUint32* refBufferPtr = static_cast<deUint32*>(stagingBuffer.getAllocation().getHostPtr());
834
835         for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
836         {
837                 const deUint32 res = *(bufferPtr + ndx);
838                 const deUint32 ref = *(refBufferPtr + ndx);
839
840                 if (res != ref)
841                 {
842                         std::ostringstream msg;
843                         msg << "Comparison failed for Output.values[" << ndx << "]";
844                         return tcu::TestStatus::fail(msg.str());
845                 }
846         }
847         return tcu::TestStatus::pass("Compute succeeded");
848 }
849
850 class CopySSBOToImageTest : public vkt::TestCase
851 {
852 public:
853                                                 CopySSBOToImageTest     (tcu::TestContext&      testCtx,
854                                                                                          const std::string&     name,
855                                                                                          const std::string&     description,
856                                                                                          const tcu::IVec2&      localSize,
857                                                                                          const tcu::IVec2&      imageSize);
858
859         void                            initPrograms            (SourceCollections& sourceCollections) const;
860         TestInstance*           createInstance          (Context&                       context) const;
861
862 private:
863         const tcu::IVec2        m_localSize;
864         const tcu::IVec2        m_imageSize;
865 };
866
867 class CopySSBOToImageTestInstance : public vkt::TestInstance
868 {
869 public:
870                                                                         CopySSBOToImageTestInstance     (Context&                       context,
871                                                                                                                                  const tcu::IVec2&      localSize,
872                                                                                                                                  const tcu::IVec2&      imageSize);
873
874         tcu::TestStatus                                 iterate                                         (void);
875
876 private:
877         const tcu::IVec2                                m_localSize;
878         const tcu::IVec2                                m_imageSize;
879 };
880
881 CopySSBOToImageTest::CopySSBOToImageTest (tcu::TestContext&             testCtx,
882                                                                                   const std::string&    name,
883                                                                                   const std::string&    description,
884                                                                                   const tcu::IVec2&             localSize,
885                                                                                   const tcu::IVec2&             imageSize)
886         : TestCase              (testCtx, name, description)
887         , m_localSize   (localSize)
888         , m_imageSize   (imageSize)
889 {
890         DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
891         DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
892 }
893
894 void CopySSBOToImageTest::initPrograms (SourceCollections& sourceCollections) const
895 {
896         std::ostringstream src;
897         src << "#version 310 es\n"
898                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
899                 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
900                 << "layout(binding = 0) readonly buffer Input {\n"
901                 << "    uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
902                 << "} sb_in;\n\n"
903                 << "void main (void) {\n"
904                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
905                 << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
906                 << "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
907                 << "}\n";
908
909         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
910 }
911
912 TestInstance* CopySSBOToImageTest::createInstance (Context& context) const
913 {
914         return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize);
915 }
916
917 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
918         : TestInstance  (context)
919         , m_localSize   (localSize)
920         , m_imageSize   (imageSize)
921 {
922 }
923
924 tcu::TestStatus CopySSBOToImageTestInstance::iterate (void)
925 {
926         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
927         const VkDevice                  device                          = m_context.getDevice();
928         const VkQueue                   queue                           = m_context.getUniversalQueue();
929         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
930         Allocator&                              allocator                       = m_context.getDefaultAllocator();
931
932         // Create an image
933
934         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
935         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
936
937         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
938         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
939
940         // Create an input buffer (data to be read in the shader)
941
942         const deUint32 imageArea = multiplyComponents(m_imageSize);
943         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
944
945         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), MemoryRequirement::HostVisible);
946
947         // Populate the buffer with test data
948         {
949                 de::Random rnd(0x77238ac2);
950                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
951                 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
952                 for (deUint32 i = 0; i < imageArea; ++i)
953                         *bufferPtr++ = rnd.getUint32();
954
955                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
956         }
957
958         // Create a buffer to store shader output (copied from image data)
959
960         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
961
962         // Create descriptor set
963
964         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
965                 DescriptorSetLayoutBuilder()
966                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
967                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
968                 .build(vk, device));
969
970         const Unique<VkDescriptorPool> descriptorPool(
971                 DescriptorPoolBuilder()
972                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
973                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
974                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
975
976         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
977
978         // Set the bindings
979
980         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
981         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
982
983         DescriptorSetUpdateBuilder()
984                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
985                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
986                 .update(vk, device);
987
988         // Perform the computation
989         {
990                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
991                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
992                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
993
994                 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
995
996                 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
997                         0u, 0u,
998                         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
999                         *image, subresourceRange);
1000
1001                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
1002                         VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
1003                         VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1004                         *image, subresourceRange);
1005
1006                 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1007
1008                 const void* preComputeBarriers[] = { &inputBufferPostHostWriteBarrier, &imageLayoutBarrier };
1009                 const void* preCopyBarriers[] = { &imagePreCopyBarrier };
1010                 const void* postCopyBarriers[] = { &outputBufferPostCopyBarrier };
1011
1012                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
1013                 const tcu::IVec2 workSize = m_imageSize / m_localSize;
1014
1015                 // Prepare the command buffer
1016
1017                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1018                 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1019
1020                 // Start recording commands
1021
1022                 beginCommandBuffer(vk, *cmdBuffer);
1023
1024                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1025                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1026
1027                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preComputeBarriers), preComputeBarriers);
1028                 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1029
1030                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preCopyBarriers), preCopyBarriers);
1031                 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &copyParams);
1032                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postCopyBarriers), postCopyBarriers);
1033
1034                 endCommandBuffer(vk, *cmdBuffer);
1035
1036                 // Wait for completion
1037
1038                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1039         }
1040
1041         // Validate the results
1042
1043         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1044         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1045
1046         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1047         const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1048
1049         for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
1050         {
1051                 const deUint32 res = *(bufferPtr + ndx);
1052                 const deUint32 ref = *(refBufferPtr + ndx);
1053
1054                 if (res != ref)
1055                 {
1056                         std::ostringstream msg;
1057                         msg << "Comparison failed for pixel " << ndx;
1058                         return tcu::TestStatus::fail(msg.str());
1059                 }
1060         }
1061         return tcu::TestStatus::pass("Compute succeeded");
1062 }
1063
1064 class BufferToBufferInvertTest : public vkt::TestCase
1065 {
1066 public:
1067         void                                                            initPrograms                            (SourceCollections&     sourceCollections) const;
1068         TestInstance*                                           createInstance                          (Context&                       context) const;
1069
1070         static BufferToBufferInvertTest*        UBOToSSBOInvertCase                     (tcu::TestContext&      testCtx,
1071                                                                                                                                          const std::string& name,
1072                                                                                                                                          const std::string& description,
1073                                                                                                                                          const deUint32         numValues,
1074                                                                                                                                          const tcu::IVec3&      localSize,
1075                                                                                                                                          const tcu::IVec3&      workSize);
1076
1077         static BufferToBufferInvertTest*        CopyInvertSSBOCase                      (tcu::TestContext&      testCtx,
1078                                                                                                                                          const std::string& name,
1079                                                                                                                                          const std::string& description,
1080                                                                                                                                          const deUint32         numValues,
1081                                                                                                                                          const tcu::IVec3&      localSize,
1082                                                                                                                                          const tcu::IVec3&      workSize);
1083
1084 private:
1085                                                                                 BufferToBufferInvertTest        (tcu::TestContext&      testCtx,
1086                                                                                                                                          const std::string& name,
1087                                                                                                                                          const std::string& description,
1088                                                                                                                                          const deUint32         numValues,
1089                                                                                                                                          const tcu::IVec3&      localSize,
1090                                                                                                                                          const tcu::IVec3&      workSize,
1091                                                                                                                                          const BufferType       bufferType);
1092
1093         const BufferType                                        m_bufferType;
1094         const deUint32                                          m_numValues;
1095         const tcu::IVec3                                        m_localSize;
1096         const tcu::IVec3                                        m_workSize;
1097 };
1098
1099 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1100 {
1101 public:
1102                                                                         BufferToBufferInvertTestInstance        (Context&                       context,
1103                                                                                                                                                  const deUint32         numValues,
1104                                                                                                                                                  const tcu::IVec3&      localSize,
1105                                                                                                                                                  const tcu::IVec3&      workSize,
1106                                                                                                                                                  const BufferType       bufferType);
1107
1108         tcu::TestStatus                                 iterate                                                         (void);
1109
1110 private:
1111         const BufferType                                m_bufferType;
1112         const deUint32                                  m_numValues;
1113         const tcu::IVec3                                m_localSize;
1114         const tcu::IVec3                                m_workSize;
1115 };
1116
1117 BufferToBufferInvertTest::BufferToBufferInvertTest (tcu::TestContext&   testCtx,
1118                                                                                                         const std::string&      name,
1119                                                                                                         const std::string&      description,
1120                                                                                                         const deUint32          numValues,
1121                                                                                                         const tcu::IVec3&       localSize,
1122                                                                                                         const tcu::IVec3&       workSize,
1123                                                                                                         const BufferType        bufferType)
1124         : TestCase              (testCtx, name, description)
1125         , m_bufferType  (bufferType)
1126         , m_numValues   (numValues)
1127         , m_localSize   (localSize)
1128         , m_workSize    (workSize)
1129 {
1130         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1131         DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1132 }
1133
1134 BufferToBufferInvertTest* BufferToBufferInvertTest::UBOToSSBOInvertCase (tcu::TestContext&      testCtx,
1135                                                                                                                                                  const std::string&     name,
1136                                                                                                                                                  const std::string&     description,
1137                                                                                                                                                  const deUint32         numValues,
1138                                                                                                                                                  const tcu::IVec3&      localSize,
1139                                                                                                                                                  const tcu::IVec3&      workSize)
1140 {
1141         return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM);
1142 }
1143
1144 BufferToBufferInvertTest* BufferToBufferInvertTest::CopyInvertSSBOCase (tcu::TestContext&       testCtx,
1145                                                                                                                                                 const std::string&      name,
1146                                                                                                                                                 const std::string&      description,
1147                                                                                                                                                 const deUint32          numValues,
1148                                                                                                                                                 const tcu::IVec3&       localSize,
1149                                                                                                                                                 const tcu::IVec3&       workSize)
1150 {
1151         return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_SSBO);
1152 }
1153
1154 void BufferToBufferInvertTest::initPrograms (SourceCollections& sourceCollections) const
1155 {
1156         std::ostringstream src;
1157         if (m_bufferType == BUFFER_TYPE_UNIFORM)
1158         {
1159                 src << "#version 310 es\n"
1160                         << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1161                         << "layout(binding = 0) readonly uniform Input {\n"
1162                         << "    uint values[" << m_numValues << "];\n"
1163                         << "} ub_in;\n"
1164                         << "layout(binding = 1) writeonly buffer Output {\n"
1165                         << "    uint values[" << m_numValues << "];\n"
1166                         << "} sb_out;\n"
1167                         << "void main (void) {\n"
1168                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1169                         << "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1170                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1171                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
1172                         << "\n"
1173                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1174                         << "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1175                         << "}\n";
1176         }
1177         else if (m_bufferType == BUFFER_TYPE_SSBO)
1178         {
1179                 src << "#version 310 es\n"
1180                         << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1181                         << "layout(binding = 0) readonly buffer Input {\n"
1182                         << "    uint values[" << m_numValues << "];\n"
1183                         << "} sb_in;\n"
1184                         << "layout (binding = 1) writeonly buffer Output {\n"
1185                         << "    uint values[" << m_numValues << "];\n"
1186                         << "} sb_out;\n"
1187                         << "void main (void) {\n"
1188                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1189                         << "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1190                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1191                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
1192                         << "\n"
1193                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1194                         << "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1195                         << "}\n";
1196         }
1197
1198         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1199 }
1200
1201 TestInstance* BufferToBufferInvertTest::createInstance (Context& context) const
1202 {
1203         return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType);
1204 }
1205
1206 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance (Context&                    context,
1207                                                                                                                                         const deUint32          numValues,
1208                                                                                                                                         const tcu::IVec3&       localSize,
1209                                                                                                                                         const tcu::IVec3&       workSize,
1210                                                                                                                                         const BufferType        bufferType)
1211         : TestInstance  (context)
1212         , m_bufferType  (bufferType)
1213         , m_numValues   (numValues)
1214         , m_localSize   (localSize)
1215         , m_workSize    (workSize)
1216 {
1217 }
1218
1219 tcu::TestStatus BufferToBufferInvertTestInstance::iterate (void)
1220 {
1221         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1222         const VkDevice                  device                          = m_context.getDevice();
1223         const VkQueue                   queue                           = m_context.getUniversalQueue();
1224         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1225         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1226
1227         // Customize the test based on buffer type
1228
1229         const VkBufferUsageFlags inputBufferUsageFlags          = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1230         const VkDescriptorType inputBufferDescriptorType        = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1231         const deUint32 randomSeed                                                       = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1232
1233         // Create an input buffer
1234
1235         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1236         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags), MemoryRequirement::HostVisible);
1237
1238         // Fill the input buffer with data
1239         {
1240                 de::Random rnd(randomSeed);
1241                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
1242                 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
1243                 for (deUint32 i = 0; i < m_numValues; ++i)
1244                         *bufferPtr++ = rnd.getUint32();
1245
1246                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
1247         }
1248
1249         // Create an output buffer
1250
1251         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1252
1253         // Create descriptor set
1254
1255         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1256                 DescriptorSetLayoutBuilder()
1257                 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1258                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1259                 .build(vk, device));
1260
1261         const Unique<VkDescriptorPool> descriptorPool(
1262                 DescriptorPoolBuilder()
1263                 .addType(inputBufferDescriptorType)
1264                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1265                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1266
1267         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1268
1269         const VkDescriptorBufferInfo inputBufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1270         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1271         DescriptorSetUpdateBuilder()
1272                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType, &inputBufferDescriptorInfo)
1273                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1274                 .update(vk, device);
1275
1276         // Perform the computation
1277
1278         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1279         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1280         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1281
1282         const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1283         const void* preComputeBarriers[] = { &hostWriteBarrier };
1284
1285         const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1286         const void* postComputeBarriers[] = { &shaderWriteBarrier };
1287
1288         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1289         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1290
1291         // Start recording commands
1292
1293         beginCommandBuffer(vk, *cmdBuffer);
1294
1295         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1296         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1297
1298         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preComputeBarriers), preComputeBarriers);
1299         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1300         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postComputeBarriers), postComputeBarriers);
1301
1302         endCommandBuffer(vk, *cmdBuffer);
1303
1304         // Wait for completion
1305
1306         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1307
1308         // Validate the results
1309
1310         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1311         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1312
1313         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1314         const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1315
1316         for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1317         {
1318                 const deUint32 res = bufferPtr[ndx];
1319                 const deUint32 ref = ~refBufferPtr[ndx];
1320
1321                 if (res != ref)
1322                 {
1323                         std::ostringstream msg;
1324                         msg << "Comparison failed for Output.values[" << ndx << "]";
1325                         return tcu::TestStatus::fail(msg.str());
1326                 }
1327         }
1328         return tcu::TestStatus::pass("Compute succeeded");
1329 }
1330
1331 class InvertSSBOInPlaceTest : public vkt::TestCase
1332 {
1333 public:
1334                                                 InvertSSBOInPlaceTest   (tcu::TestContext&      testCtx,
1335                                                                                                  const std::string&     name,
1336                                                                                                  const std::string&     description,
1337                                                                                                  const deUint32         numValues,
1338                                                                                                  const bool                     sized,
1339                                                                                                  const tcu::IVec3&      localSize,
1340                                                                                                  const tcu::IVec3&      workSize);
1341
1342
1343         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1344         TestInstance*           createInstance                  (Context&                       context) const;
1345
1346 private:
1347         const deUint32          m_numValues;
1348         const bool                      m_sized;
1349         const tcu::IVec3        m_localSize;
1350         const tcu::IVec3        m_workSize;
1351 };
1352
1353 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1354 {
1355 public:
1356                                                                         InvertSSBOInPlaceTestInstance   (Context&                       context,
1357                                                                                                                                          const deUint32         numValues,
1358                                                                                                                                          const tcu::IVec3&      localSize,
1359                                                                                                                                          const tcu::IVec3&      workSize);
1360
1361         tcu::TestStatus                                 iterate                                                 (void);
1362
1363 private:
1364         const deUint32                                  m_numValues;
1365         const tcu::IVec3                                m_localSize;
1366         const tcu::IVec3                                m_workSize;
1367 };
1368
1369 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest (tcu::TestContext&         testCtx,
1370                                                                                           const std::string&    name,
1371                                                                                           const std::string&    description,
1372                                                                                           const deUint32                numValues,
1373                                                                                           const bool                    sized,
1374                                                                                           const tcu::IVec3&             localSize,
1375                                                                                           const tcu::IVec3&             workSize)
1376         : TestCase              (testCtx, name, description)
1377         , m_numValues   (numValues)
1378         , m_sized               (sized)
1379         , m_localSize   (localSize)
1380         , m_workSize    (workSize)
1381 {
1382         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1383 }
1384
1385 void InvertSSBOInPlaceTest::initPrograms (SourceCollections& sourceCollections) const
1386 {
1387         std::ostringstream src;
1388         src << "#version 310 es\n"
1389                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1390                 << "layout(binding = 0) buffer InOut {\n"
1391                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1392                 << "} sb_inout;\n"
1393                 << "void main (void) {\n"
1394                 << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1395                 << "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1396                 << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1397                 << "    uint offset          = numValuesPerInv*groupNdx;\n"
1398                 << "\n"
1399                 << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1400                 << "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1401                 << "}\n";
1402
1403         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1404 }
1405
1406 TestInstance* InvertSSBOInPlaceTest::createInstance (Context& context) const
1407 {
1408         return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize);
1409 }
1410
1411 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance (Context&                  context,
1412                                                                                                                           const deUint32        numValues,
1413                                                                                                                           const tcu::IVec3&     localSize,
1414                                                                                                                           const tcu::IVec3&     workSize)
1415         : TestInstance  (context)
1416         , m_numValues   (numValues)
1417         , m_localSize   (localSize)
1418         , m_workSize    (workSize)
1419 {
1420 }
1421
1422 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate (void)
1423 {
1424         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1425         const VkDevice                  device                          = m_context.getDevice();
1426         const VkQueue                   queue                           = m_context.getUniversalQueue();
1427         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1428         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1429
1430         // Create an input/output buffer
1431
1432         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1433         const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1434
1435         // Fill the buffer with data
1436
1437         typedef std::vector<deUint32> data_vector_t;
1438         data_vector_t inputData(m_numValues);
1439
1440         {
1441                 de::Random rnd(0x82ce7f);
1442                 const Allocation& bufferAllocation = buffer.getAllocation();
1443                 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1444                 for (deUint32 i = 0; i < m_numValues; ++i)
1445                         inputData[i] = *bufferPtr++ = rnd.getUint32();
1446
1447                 flushMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1448         }
1449
1450         // Create descriptor set
1451
1452         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1453                 DescriptorSetLayoutBuilder()
1454                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1455                 .build(vk, device));
1456
1457         const Unique<VkDescriptorPool> descriptorPool(
1458                 DescriptorPoolBuilder()
1459                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1460                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1461
1462         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1463
1464         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1465         DescriptorSetUpdateBuilder()
1466                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1467                 .update(vk, device);
1468
1469         // Perform the computation
1470
1471         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1472         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1473         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1474
1475         const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1476         const void* preComputeBarriers[] = { &hostWriteBarrier };
1477
1478         const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1479         const void* postComputeBarriers[] = { &shaderWriteBarrier };
1480
1481         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1482         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1483
1484         // Start recording commands
1485
1486         beginCommandBuffer(vk, *cmdBuffer);
1487
1488         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1489         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1490
1491         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preComputeBarriers), preComputeBarriers);
1492         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1493         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postComputeBarriers), postComputeBarriers);
1494
1495         endCommandBuffer(vk, *cmdBuffer);
1496
1497         // Wait for completion
1498
1499         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1500
1501         // Validate the results
1502
1503         const Allocation& bufferAllocation = buffer.getAllocation();
1504         invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1505
1506         const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1507
1508         for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1509         {
1510                 const deUint32 res = bufferPtr[ndx];
1511                 const deUint32 ref = ~inputData[ndx];
1512
1513                 if (res != ref)
1514                 {
1515                         std::ostringstream msg;
1516                         msg << "Comparison failed for InOut.values[" << ndx << "]";
1517                         return tcu::TestStatus::fail(msg.str());
1518                 }
1519         }
1520         return tcu::TestStatus::pass("Compute succeeded");
1521 }
1522
1523 class WriteToMultipleSSBOTest : public vkt::TestCase
1524 {
1525 public:
1526                                                 WriteToMultipleSSBOTest (tcu::TestContext&      testCtx,
1527                                                                                                  const std::string&     name,
1528                                                                                                  const std::string&     description,
1529                                                                                                  const deUint32         numValues,
1530                                                                                                  const bool                     sized,
1531                                                                                                  const tcu::IVec3&      localSize,
1532                                                                                                  const tcu::IVec3&      workSize);
1533
1534         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1535         TestInstance*           createInstance                  (Context&                       context) const;
1536
1537 private:
1538         const deUint32          m_numValues;
1539         const bool                      m_sized;
1540         const tcu::IVec3        m_localSize;
1541         const tcu::IVec3        m_workSize;
1542 };
1543
1544 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1545 {
1546 public:
1547                                                                         WriteToMultipleSSBOTestInstance (Context&                       context,
1548                                                                                                                                          const deUint32         numValues,
1549                                                                                                                                          const tcu::IVec3&      localSize,
1550                                                                                                                                          const tcu::IVec3&      workSize);
1551
1552         tcu::TestStatus                                 iterate                                                 (void);
1553
1554 private:
1555         const deUint32                                  m_numValues;
1556         const tcu::IVec3                                m_localSize;
1557         const tcu::IVec3                                m_workSize;
1558 };
1559
1560 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest (tcu::TestContext&             testCtx,
1561                                                                                                   const std::string&    name,
1562                                                                                                   const std::string&    description,
1563                                                                                                   const deUint32                numValues,
1564                                                                                                   const bool                    sized,
1565                                                                                                   const tcu::IVec3&             localSize,
1566                                                                                                   const tcu::IVec3&             workSize)
1567         : TestCase              (testCtx, name, description)
1568         , m_numValues   (numValues)
1569         , m_sized               (sized)
1570         , m_localSize   (localSize)
1571         , m_workSize    (workSize)
1572 {
1573         DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1574 }
1575
1576 void WriteToMultipleSSBOTest::initPrograms (SourceCollections& sourceCollections) const
1577 {
1578         std::ostringstream src;
1579         src << "#version 310 es\n"
1580                 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1581                 << "layout(binding = 0) writeonly buffer Out0 {\n"
1582                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1583                 << "} sb_out0;\n"
1584                 << "layout(binding = 1) writeonly buffer Out1 {\n"
1585                 << "    uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1586                 << "} sb_out1;\n"
1587                 << "void main (void) {\n"
1588                 << "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1589                 << "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1590                 << "\n"
1591                 << "    {\n"
1592                 << "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1593                 << "        uint offset          = numValuesPerInv*groupNdx;\n"
1594                 << "\n"
1595                 << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1596                 << "            sb_out0.values[offset + ndx] = offset + ndx;\n"
1597                 << "    }\n"
1598                 << "    {\n"
1599                 << "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1600                 << "        uint offset          = numValuesPerInv*groupNdx;\n"
1601                 << "\n"
1602                 << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1603                 << "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1604                 << "    }\n"
1605                 << "}\n";
1606
1607         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1608 }
1609
1610 TestInstance* WriteToMultipleSSBOTest::createInstance (Context& context) const
1611 {
1612         return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize);
1613 }
1614
1615 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance (Context&                      context,
1616                                                                                                                                   const deUint32        numValues,
1617                                                                                                                                   const tcu::IVec3&     localSize,
1618                                                                                                                                   const tcu::IVec3&     workSize)
1619         : TestInstance  (context)
1620         , m_numValues   (numValues)
1621         , m_localSize   (localSize)
1622         , m_workSize    (workSize)
1623 {
1624 }
1625
1626 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate (void)
1627 {
1628         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1629         const VkDevice                  device                          = m_context.getDevice();
1630         const VkQueue                   queue                           = m_context.getUniversalQueue();
1631         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1632         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1633
1634         // Create two output buffers
1635
1636         const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1637         const Buffer buffer0(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1638         const Buffer buffer1(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1639
1640         // Create descriptor set
1641
1642         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1643                 DescriptorSetLayoutBuilder()
1644                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1645                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1646                 .build(vk, device));
1647
1648         const Unique<VkDescriptorPool> descriptorPool(
1649                 DescriptorPoolBuilder()
1650                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1651                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1652
1653         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1654
1655         const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1656         const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1657         DescriptorSetUpdateBuilder()
1658                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1659                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1660                 .update(vk, device);
1661
1662         // Perform the computation
1663
1664         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1665         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1666         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1667
1668         const VkBufferMemoryBarrier shaderWriteBarrier0 = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes);
1669         const VkBufferMemoryBarrier shaderWriteBarrier1 = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes);
1670         const void* postComputeBarriers[] = { &shaderWriteBarrier0, &shaderWriteBarrier1 };
1671
1672         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1673         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1674
1675         // Start recording commands
1676
1677         beginCommandBuffer(vk, *cmdBuffer);
1678
1679         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1680         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1681
1682         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1683         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postComputeBarriers), postComputeBarriers);
1684
1685         endCommandBuffer(vk, *cmdBuffer);
1686
1687         // Wait for completion
1688
1689         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1690
1691         // Validate the results
1692         {
1693                 const Allocation& buffer0Allocation = buffer0.getAllocation();
1694                 invalidateMappedMemoryRange(vk, device, buffer0Allocation.getMemory(), buffer0Allocation.getOffset(), bufferSizeBytes);
1695                 const deUint32* buffer0Ptr = static_cast<deUint32*>(buffer0Allocation.getHostPtr());
1696
1697                 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1698                 {
1699                         const deUint32 res = buffer0Ptr[ndx];
1700                         const deUint32 ref = ndx;
1701
1702                         if (res != ref)
1703                         {
1704                                 std::ostringstream msg;
1705                                 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1706                                 return tcu::TestStatus::fail(msg.str());
1707                         }
1708                 }
1709         }
1710         {
1711                 const Allocation& buffer1Allocation = buffer1.getAllocation();
1712                 invalidateMappedMemoryRange(vk, device, buffer1Allocation.getMemory(), buffer1Allocation.getOffset(), bufferSizeBytes);
1713                 const deUint32* buffer1Ptr = static_cast<deUint32*>(buffer1Allocation.getHostPtr());
1714
1715                 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1716                 {
1717                         const deUint32 res = buffer1Ptr[ndx];
1718                         const deUint32 ref = m_numValues - ndx;
1719
1720                         if (res != ref)
1721                         {
1722                                 std::ostringstream msg;
1723                                 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1724                                 return tcu::TestStatus::fail(msg.str());
1725                         }
1726                 }
1727         }
1728         return tcu::TestStatus::pass("Compute succeeded");
1729 }
1730
1731 class SSBOBarrierTest : public vkt::TestCase
1732 {
1733 public:
1734                                                 SSBOBarrierTest         (tcu::TestContext&      testCtx,
1735                                                                                          const std::string&     name,
1736                                                                                          const std::string&     description,
1737                                                                                          const tcu::IVec3&      workSize);
1738
1739         void                            initPrograms            (SourceCollections& sourceCollections) const;
1740         TestInstance*           createInstance          (Context&                       context) const;
1741
1742 private:
1743         const tcu::IVec3        m_workSize;
1744 };
1745
1746 class SSBOBarrierTestInstance : public vkt::TestInstance
1747 {
1748 public:
1749                                                                         SSBOBarrierTestInstance         (Context&                       context,
1750                                                                                                                                  const tcu::IVec3&      workSize);
1751
1752         tcu::TestStatus                                 iterate                                         (void);
1753
1754 private:
1755         const tcu::IVec3                                m_workSize;
1756 };
1757
1758 SSBOBarrierTest::SSBOBarrierTest (tcu::TestContext&             testCtx,
1759                                                                   const std::string&    name,
1760                                                                   const std::string&    description,
1761                                                                   const tcu::IVec3&             workSize)
1762         : TestCase              (testCtx, name, description)
1763         , m_workSize    (workSize)
1764 {
1765 }
1766
1767 void SSBOBarrierTest::initPrograms (SourceCollections& sourceCollections) const
1768 {
1769         sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
1770                 "#version 310 es\n"
1771                 "layout (local_size_x = 1) in;\n"
1772                 "layout(binding = 2) readonly uniform Constants {\n"
1773                 "    uint u_baseVal;\n"
1774                 "};\n"
1775                 "layout(binding = 1) writeonly buffer Output {\n"
1776                 "    uint values[];\n"
1777                 "};\n"
1778                 "void main (void) {\n"
1779                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1780                 "    values[offset] = u_baseVal + offset;\n"
1781                 "}\n");
1782
1783         sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
1784                 "#version 310 es\n"
1785                 "layout (local_size_x = 1) in;\n"
1786                 "layout(binding = 1) readonly buffer Input {\n"
1787                 "    uint values[];\n"
1788                 "};\n"
1789                 "layout(binding = 0) coherent buffer Output {\n"
1790                 "    uint sum;\n"
1791                 "};\n"
1792                 "void main (void) {\n"
1793                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1794                 "    uint value  = values[offset];\n"
1795                 "    atomicAdd(sum, value);\n"
1796                 "}\n");
1797 }
1798
1799 TestInstance* SSBOBarrierTest::createInstance (Context& context) const
1800 {
1801         return new SSBOBarrierTestInstance(context, m_workSize);
1802 }
1803
1804 SSBOBarrierTestInstance::SSBOBarrierTestInstance (Context& context, const tcu::IVec3& workSize)
1805         : TestInstance  (context)
1806         , m_workSize    (workSize)
1807 {
1808 }
1809
1810 tcu::TestStatus SSBOBarrierTestInstance::iterate (void)
1811 {
1812         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1813         const VkDevice                  device                          = m_context.getDevice();
1814         const VkQueue                   queue                           = m_context.getUniversalQueue();
1815         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1816         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1817
1818         // Create a work buffer used by both shaders
1819
1820         const int workGroupCount = multiplyComponents(m_workSize);
1821         const VkDeviceSize workBufferSizeBytes = sizeof(deUint32) * workGroupCount;
1822         const Buffer workBuffer(vk, device, allocator, makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any);
1823
1824         // Create an output buffer
1825
1826         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
1827         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1828
1829         // Create a uniform buffer (to pass uniform constants)
1830
1831         const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
1832         const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
1833
1834         // Set the constants in the uniform buffer
1835
1836         const deUint32  baseValue = 127;
1837         {
1838                 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
1839                 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
1840                 uniformBufferPtr[0] = baseValue;
1841
1842                 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
1843         }
1844
1845         // Create descriptor set
1846
1847         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1848                 DescriptorSetLayoutBuilder()
1849                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1850                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1851                 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1852                 .build(vk, device));
1853
1854         const Unique<VkDescriptorPool> descriptorPool(
1855                 DescriptorPoolBuilder()
1856                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1857                 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1858                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1859
1860         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1861
1862         const VkDescriptorBufferInfo workBufferDescriptorInfo = makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1863         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1864         const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1865         DescriptorSetUpdateBuilder()
1866                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1867                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1868                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1869                 .update(vk, device);
1870
1871         // Perform the computation
1872
1873         const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
1874         const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
1875
1876         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1877         const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
1878         const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
1879
1880         const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
1881         const void* barriersBeforeCompute[] = { &writeUniformConstantsBarrier };
1882
1883         const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
1884         const void* barriersAfterFirstShader[] = { &betweenShadersBarrier };
1885
1886         const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
1887         const void* barriersAfterCompute[] = { &afterComputeBarrier };
1888
1889         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1890         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1891
1892         // Start recording commands
1893
1894         beginCommandBuffer(vk, *cmdBuffer);
1895
1896         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
1897         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1898
1899         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersBeforeCompute), barriersBeforeCompute);
1900
1901         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1902         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersAfterFirstShader), barriersAfterFirstShader);
1903
1904         // Switch to the second shader program
1905         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
1906
1907         vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1908         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersAfterCompute), barriersAfterCompute);
1909
1910         endCommandBuffer(vk, *cmdBuffer);
1911
1912         // Wait for completion
1913
1914         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1915
1916         // Validate the results
1917
1918         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1919         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1920
1921         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1922         const deUint32  res = *bufferPtr;
1923         deUint32                ref = 0;
1924
1925         for (int ndx = 0; ndx < workGroupCount; ++ndx)
1926                 ref += baseValue + ndx;
1927
1928         if (res != ref)
1929         {
1930                 std::ostringstream msg;
1931                 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
1932                 return tcu::TestStatus::fail(msg.str());
1933         }
1934         return tcu::TestStatus::pass("Compute succeeded");
1935 }
1936
1937 class ImageAtomicOpTest : public vkt::TestCase
1938 {
1939 public:
1940                                                 ImageAtomicOpTest               (tcu::TestContext&      testCtx,
1941                                                                                                  const std::string& name,
1942                                                                                                  const std::string& description,
1943                                                                                                  const deUint32         localSize,
1944                                                                                                  const tcu::IVec2&      imageSize);
1945
1946         void                            initPrograms                    (SourceCollections& sourceCollections) const;
1947         TestInstance*           createInstance                  (Context&                       context) const;
1948
1949 private:
1950         const deUint32          m_localSize;
1951         const tcu::IVec2        m_imageSize;
1952 };
1953
1954 class ImageAtomicOpTestInstance : public vkt::TestInstance
1955 {
1956 public:
1957                                                                         ImageAtomicOpTestInstance               (Context&                       context,
1958                                                                                                                                          const deUint32         localSize,
1959                                                                                                                                          const tcu::IVec2&      imageSize);
1960
1961         tcu::TestStatus                                 iterate                                                 (void);
1962
1963 private:
1964         const deUint32                                  m_localSize;
1965         const tcu::IVec2                                m_imageSize;
1966 };
1967
1968 ImageAtomicOpTest::ImageAtomicOpTest (tcu::TestContext&         testCtx,
1969                                                                           const std::string&    name,
1970                                                                           const std::string&    description,
1971                                                                           const deUint32                localSize,
1972                                                                           const tcu::IVec2&             imageSize)
1973         : TestCase              (testCtx, name, description)
1974         , m_localSize   (localSize)
1975         , m_imageSize   (imageSize)
1976 {
1977 }
1978
1979 void ImageAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
1980 {
1981         std::ostringstream src;
1982         src << "#version 310 es\n"
1983                 << "#extension GL_OES_shader_image_atomic : require\n"
1984                 << "layout (local_size_x = " << m_localSize << ") in;\n"
1985                 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
1986                 << "layout(binding = 0) readonly buffer Input {\n"
1987                 << "    uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
1988                 << "} sb_in;\n\n"
1989                 << "void main (void) {\n"
1990                 << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1991                 << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1992                 << "\n"
1993                 << "    if (gl_LocalInvocationIndex == 0u)\n"
1994                 << "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1995                 << "    memoryBarrierImage();\n"
1996                 << "    barrier();\n"
1997                 << "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1998                 << "}\n";
1999
2000         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
2001 }
2002
2003 TestInstance* ImageAtomicOpTest::createInstance (Context& context) const
2004 {
2005         return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize);
2006 }
2007
2008 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance (Context& context, const deUint32 localSize, const tcu::IVec2& imageSize)
2009         : TestInstance  (context)
2010         , m_localSize   (localSize)
2011         , m_imageSize   (imageSize)
2012 {
2013 }
2014
2015 tcu::TestStatus ImageAtomicOpTestInstance::iterate (void)
2016 {
2017         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
2018         const VkDevice                  device                          = m_context.getDevice();
2019         const VkQueue                   queue                           = m_context.getUniversalQueue();
2020         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
2021         Allocator&                              allocator                       = m_context.getDefaultAllocator();
2022
2023         // Create an image
2024
2025         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2026         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2027
2028         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2029         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2030
2031         // Input buffer
2032
2033         const deUint32 numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2034         const VkDeviceSize inputBufferSizeBytes = sizeof(deUint32) * numInputValues;
2035
2036         const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
2037
2038         // Populate the input buffer with test data
2039         {
2040                 de::Random rnd(0x77238ac2);
2041                 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
2042                 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
2043                 for (deUint32 i = 0; i < numInputValues; ++i)
2044                         *bufferPtr++ = rnd.getUint32();
2045
2046                 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), inputBufferSizeBytes);
2047         }
2048
2049         // Create a buffer to store shader output (copied from image data)
2050
2051         const deUint32 imageArea = multiplyComponents(m_imageSize);
2052         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32) * imageArea;
2053         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
2054
2055         // Create descriptor set
2056
2057         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2058                 DescriptorSetLayoutBuilder()
2059                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2060                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2061                 .build(vk, device));
2062
2063         const Unique<VkDescriptorPool> descriptorPool(
2064                 DescriptorPoolBuilder()
2065                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2066                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2067                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2068
2069         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2070
2071         // Set the bindings
2072
2073         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2074         const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2075
2076         DescriptorSetUpdateBuilder()
2077                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2078                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2079                 .update(vk, device);
2080
2081         // Perform the computation
2082         {
2083                 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
2084                 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2085                 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2086
2087                 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2088
2089                 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
2090                         VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
2091                         VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2092                         *image, subresourceRange);
2093
2094                 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2095
2096                 const void* preComputeBarriers[] = { &inputBufferPostHostWriteBarrier };
2097                 const void* preCopyBarriers[] = { &imagePreCopyBarrier };
2098                 const void* postCopyBarriers[] = { &outputBufferPostCopyBarrier };
2099
2100                 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
2101
2102                 // Prepare the command buffer
2103
2104                 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2105                 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2106
2107                 // Start recording commands
2108
2109                 beginCommandBuffer(vk, *cmdBuffer);
2110
2111                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2112                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2113
2114                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preComputeBarriers), preComputeBarriers);
2115                 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2116
2117                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preCopyBarriers), preCopyBarriers);
2118                 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &copyParams);
2119                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postCopyBarriers), postCopyBarriers);
2120
2121                 endCommandBuffer(vk, *cmdBuffer);
2122
2123                 // Wait for completion
2124
2125                 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2126         }
2127
2128         // Validate the results
2129
2130         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2131         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2132
2133         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2134         const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
2135
2136         for (deUint32 pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2137         {
2138                 const deUint32  res = bufferPtr[pixelNdx];
2139                 deUint32                ref = 0;
2140
2141                 for (deUint32 offs = 0; offs < m_localSize; ++offs)
2142                         ref += refBufferPtr[pixelNdx * m_localSize + offs];
2143
2144                 if (res != ref)
2145                 {
2146                         std::ostringstream msg;
2147                         msg << "Comparison failed for pixel " << pixelNdx;
2148                         return tcu::TestStatus::fail(msg.str());
2149                 }
2150         }
2151         return tcu::TestStatus::pass("Compute succeeded");
2152 }
2153
2154 class ImageBarrierTest : public vkt::TestCase
2155 {
2156 public:
2157                                                 ImageBarrierTest        (tcu::TestContext&      testCtx,
2158                                                                                         const std::string&      name,
2159                                                                                         const std::string&      description,
2160                                                                                         const tcu::IVec2&       imageSize);
2161
2162         void                            initPrograms            (SourceCollections& sourceCollections) const;
2163         TestInstance*           createInstance          (Context&                       context) const;
2164
2165 private:
2166         const tcu::IVec2        m_imageSize;
2167 };
2168
2169 class ImageBarrierTestInstance : public vkt::TestInstance
2170 {
2171 public:
2172                                                                         ImageBarrierTestInstance        (Context&                       context,
2173                                                                                                                                  const tcu::IVec2&      imageSize);
2174
2175         tcu::TestStatus                                 iterate                                         (void);
2176
2177 private:
2178         const tcu::IVec2                                m_imageSize;
2179 };
2180
2181 ImageBarrierTest::ImageBarrierTest (tcu::TestContext&   testCtx,
2182                                                                         const std::string&      name,
2183                                                                         const std::string&      description,
2184                                                                         const tcu::IVec2&       imageSize)
2185         : TestCase              (testCtx, name, description)
2186         , m_imageSize   (imageSize)
2187 {
2188 }
2189
2190 void ImageBarrierTest::initPrograms (SourceCollections& sourceCollections) const
2191 {
2192         sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
2193                 "#version 310 es\n"
2194                 "layout (local_size_x = 1) in;\n"
2195                 "layout(binding = 2) readonly uniform Constants {\n"
2196                 "    uint u_baseVal;\n"
2197                 "};\n"
2198                 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2199                 "void main (void) {\n"
2200                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2201                 "    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2202                 "}\n");
2203
2204         sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
2205                 "#version 310 es\n"
2206                 "layout (local_size_x = 1) in;\n"
2207                 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2208                 "layout(binding = 0) coherent buffer Output {\n"
2209                 "    uint sum;\n"
2210                 "};\n"
2211                 "void main (void) {\n"
2212                 "    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2213                 "    atomicAdd(sum, value);\n"
2214                 "}\n");
2215 }
2216
2217 TestInstance* ImageBarrierTest::createInstance (Context& context) const
2218 {
2219         return new ImageBarrierTestInstance(context, m_imageSize);
2220 }
2221
2222 ImageBarrierTestInstance::ImageBarrierTestInstance (Context& context, const tcu::IVec2& imageSize)
2223         : TestInstance  (context)
2224         , m_imageSize   (imageSize)
2225 {
2226 }
2227
2228 tcu::TestStatus ImageBarrierTestInstance::iterate (void)
2229 {
2230         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
2231         const VkDevice                  device                          = m_context.getDevice();
2232         const VkQueue                   queue                           = m_context.getUniversalQueue();
2233         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
2234         Allocator&                              allocator                       = m_context.getDefaultAllocator();
2235
2236         // Create an image used by both shaders
2237
2238         const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2239         const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2240
2241         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2242         const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2243
2244         // Create an output buffer
2245
2246         const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
2247         const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2248
2249         // Create a uniform buffer (to pass uniform constants)
2250
2251         const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
2252         const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2253
2254         // Set the constants in the uniform buffer
2255
2256         const deUint32  baseValue = 127;
2257         {
2258                 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
2259                 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
2260                 uniformBufferPtr[0] = baseValue;
2261
2262                 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
2263         }
2264
2265         // Create descriptor set
2266
2267         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2268                 DescriptorSetLayoutBuilder()
2269                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2270                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2271                 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2272                 .build(vk, device));
2273
2274         const Unique<VkDescriptorPool> descriptorPool(
2275                 DescriptorPoolBuilder()
2276                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2277                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2278                 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2279                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2280
2281         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2282
2283         const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2284         const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2285         const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2286         DescriptorSetUpdateBuilder()
2287                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2288                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2289                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2290                 .update(vk, device);
2291
2292         // Perform the computation
2293
2294         const Unique<VkShaderModule>    shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
2295         const Unique<VkShaderModule>    shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
2296
2297         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2298         const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
2299         const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
2300
2301         const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2302
2303         const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2304                 0u, 0u,
2305                 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2306                 *image, subresourceRange);
2307
2308         const void* barriersBeforeCompute[] = { &writeUniformConstantsBarrier, &imageLayoutBarrier };
2309
2310         const VkImageMemoryBarrier imageBarrierBetweenShaders = makeImageMemoryBarrier(
2311                 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
2312                 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2313                 *image, subresourceRange);
2314
2315         const void* barriersAfterFirstShader[] = { &imageBarrierBetweenShaders };
2316
2317         const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2318         const void* barriersAfterCompute[] = { &afterComputeBarrier };
2319
2320         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2321         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2322
2323         // Start recording commands
2324
2325         beginCommandBuffer(vk, *cmdBuffer);
2326
2327         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
2328         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2329
2330         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersBeforeCompute), barriersBeforeCompute);
2331
2332         vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2333         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersAfterFirstShader), barriersAfterFirstShader);
2334
2335         // Switch to the second shader program
2336         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
2337
2338         vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2339         vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersAfterCompute), barriersAfterCompute);
2340
2341         endCommandBuffer(vk, *cmdBuffer);
2342
2343         // Wait for completion
2344
2345         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2346
2347         // Validate the results
2348
2349         const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2350         invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2351
2352         const int               numValues = multiplyComponents(m_imageSize);
2353         const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2354         const deUint32  res = *bufferPtr;
2355         deUint32                ref = 0;
2356
2357         for (int ndx = 0; ndx < numValues; ++ndx)
2358                 ref += baseValue + ndx;
2359
2360         if (res != ref)
2361         {
2362                 std::ostringstream msg;
2363                 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2364                 return tcu::TestStatus::fail(msg.str());
2365         }
2366         return tcu::TestStatus::pass("Compute succeeded");
2367 }
2368
2369 namespace EmptyShaderTest
2370 {
2371
2372 void createProgram (SourceCollections& dst)
2373 {
2374         dst.glslSources.add("comp") << glu::ComputeSource(
2375                 "#version 310 es\n"
2376                 "layout (local_size_x = 1) in;\n"
2377                 "void main (void) {}\n"
2378         );
2379 }
2380
2381 tcu::TestStatus createTest (Context& context)
2382 {
2383         const DeviceInterface&  vk                                      = context.getDeviceInterface();
2384         const VkDevice                  device                          = context.getDevice();
2385         const VkQueue                   queue                           = context.getUniversalQueue();
2386         const deUint32                  queueFamilyIndex        = context.getUniversalQueueFamilyIndex();
2387
2388         const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u));
2389
2390         const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device));
2391         const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2392
2393         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2394         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2395
2396         // Start recording commands
2397
2398         beginCommandBuffer(vk, *cmdBuffer);
2399
2400         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2401
2402         const tcu::IVec3 workGroups(1, 1, 1);
2403         vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
2404
2405         endCommandBuffer(vk, *cmdBuffer);
2406
2407         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2408
2409         return tcu::TestStatus::pass("Compute succeeded");
2410 }
2411
2412 } // EmptyShaderTest ns
2413 } // anonymous
2414
2415 tcu::TestCaseGroup* createBasicComputeShaderTests (tcu::TestContext& testCtx)
2416 {
2417         de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic", "Basic compute tests"));
2418
2419         addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", "Shader that does nothing", EmptyShaderTest::createProgram, EmptyShaderTest::createTest);
2420
2421         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_single_invocation",        "Copy from UBO to SSBO, inverting bits",        256,    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2422         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_single_group",                     "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(2,1,4),      tcu::IVec3(1,1,1)));
2423         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_multiple_invocations",     "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(1,1,1),      tcu::IVec3(2,4,1)));
2424         basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx,      "ubo_to_ssbo_multiple_groups",          "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2425
2426         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_single_invocation",          "Copy between SSBOs, inverting bits",   256,    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2427         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_multiple_invocations",       "Copy between SSBOs, inverting bits",   1024,   tcu::IVec3(1,1,1),      tcu::IVec3(2,4,1)));
2428         basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx,       "copy_ssbo_multiple_groups",            "Copy between SSBOs, inverting bits",   1024,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2429
2430         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_rw_single_invocation",                    "Read and write same SSBO",             256,    true,   tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2431         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_rw_multiple_groups",                              "Read and write same SSBO",             1024,   true,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2432         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_unsized_arr_single_invocation",   "Read and write same SSBO",             256,    false,  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2433         basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx,  "ssbo_unsized_arr_multiple_groups",             "Read and write same SSBO",             1024,   false,  tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2434
2435         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_arr_single_invocation",                 "Write to multiple SSBOs",      256,    true,   tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2436         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_arr_multiple_groups",                   "Write to multiple SSBOs",      1024,   true,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2437         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs",      256,    false,  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2438         basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx,        "write_multiple_unsized_arr_multiple_groups",   "Write to multiple SSBOs",      1024,   false,  tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
2439
2440         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_single_invocation", "SSBO local barrier usage",     tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2441         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_single_group",              "SSBO local barrier usage",     tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2442         basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx,   "ssbo_local_barrier_multiple_groups",   "SSBO local barrier usage",     tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2443
2444         basicComputeTests->addChild(new SSBOBarrierTest(testCtx,        "ssbo_cmd_barrier_single",              "SSBO memory barrier usage",    tcu::IVec3(1,1,1)));
2445         basicComputeTests->addChild(new SSBOBarrierTest(testCtx,        "ssbo_cmd_barrier_multiple",    "SSBO memory barrier usage",    tcu::IVec3(11,5,7)));
2446
2447         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_single_invocation",         "Basic shared variable usage",  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2448         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_single_group",                      "Basic shared variable usage",  tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2449         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_multiple_invocations",      "Basic shared variable usage",  tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
2450         basicComputeTests->addChild(new SharedVarTest(testCtx,  "shared_var_multiple_groups",           "Basic shared variable usage",  tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2451
2452         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_single_invocation",           "Atomic operation with shared var",             tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
2453         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_single_group",                        "Atomic operation with shared var",             tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
2454         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_multiple_invocations",        "Atomic operation with shared var",             tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
2455         basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx,  "shared_atomic_op_multiple_groups",                     "Atomic operation with shared var",             tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
2456
2457         basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx,    "copy_image_to_ssbo_small",     "Image to SSBO copy",   tcu::IVec2(1,1),        tcu::IVec2(64,64)));
2458         basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx,    "copy_image_to_ssbo_large",     "Image to SSBO copy",   tcu::IVec2(2,4),        tcu::IVec2(512,512)));
2459
2460         basicComputeTests->addChild(new CopySSBOToImageTest(testCtx,    "copy_ssbo_to_image_small",     "SSBO to image copy",   tcu::IVec2(1, 1),       tcu::IVec2(64, 64)));
2461         basicComputeTests->addChild(new CopySSBOToImageTest(testCtx,    "copy_ssbo_to_image_large",     "SSBO to image copy",   tcu::IVec2(2, 4),       tcu::IVec2(512, 512)));
2462
2463         basicComputeTests->addChild(new ImageAtomicOpTest(testCtx,      "image_atomic_op_local_size_1", "Atomic operation with image",  1,      tcu::IVec2(64,64)));
2464         basicComputeTests->addChild(new ImageAtomicOpTest(testCtx,      "image_atomic_op_local_size_8", "Atomic operation with image",  8,      tcu::IVec2(64,64)));
2465
2466         basicComputeTests->addChild(new ImageBarrierTest(testCtx,       "image_barrier_single",         "Image barrier",        tcu::IVec2(1,1)));
2467         basicComputeTests->addChild(new ImageBarrierTest(testCtx,       "image_barrier_multiple",       "Image barrier",        tcu::IVec2(64,64)));
2468
2469         return basicComputeTests.release();
2470 }
2471
2472 } // compute
2473 } // vkt