resolve merge conflicts of eb48f49dd to oc-dev am: 7e37841e4f -s ours
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / image / vktImageAtomicOperationTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "vktImageAtomicOperationTests.hpp"
25
26 #include "deUniquePtr.hpp"
27 #include "deStringUtil.hpp"
28
29 #include "vktTestCaseUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkImageUtil.hpp"
32 #include "vktImageTestsUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkRef.hpp"
35 #include "vkRefUtil.hpp"
36 #include "vkTypeUtil.hpp"
37
38 #include "tcuTextureUtil.hpp"
39 #include "tcuTexture.hpp"
40 #include "tcuVectorType.hpp"
41
42 namespace vkt
43 {
44 namespace image
45 {
46 namespace
47 {
48
49 using namespace vk;
50 using namespace std;
51 using de::toString;
52
53 using tcu::TextureFormat;
54 using tcu::IVec2;
55 using tcu::IVec3;
56 using tcu::UVec3;
57 using tcu::Vec4;
58 using tcu::IVec4;
59 using tcu::UVec4;
60 using tcu::CubeFace;
61 using tcu::Texture1D;
62 using tcu::Texture2D;
63 using tcu::Texture3D;
64 using tcu::Texture2DArray;
65 using tcu::TextureCube;
66 using tcu::PixelBufferAccess;
67 using tcu::ConstPixelBufferAccess;
68 using tcu::Vector;
69 using tcu::TestContext;
70
71 enum
72 {
73         NUM_INVOCATIONS_PER_PIXEL = 5u
74 };
75
76 enum AtomicOperation
77 {
78         ATOMIC_OPERATION_ADD = 0,
79         ATOMIC_OPERATION_MIN,
80         ATOMIC_OPERATION_MAX,
81         ATOMIC_OPERATION_AND,
82         ATOMIC_OPERATION_OR,
83         ATOMIC_OPERATION_XOR,
84         ATOMIC_OPERATION_EXCHANGE,
85
86         ATOMIC_OPERATION_LAST
87 };
88
89 static string getCoordStr (const ImageType              imageType,
90                                                    const std::string&   x,
91                                                    const std::string&   y,
92                                                    const std::string&   z)
93 {
94         switch (imageType)
95         {
96                 case IMAGE_TYPE_1D:
97                 case IMAGE_TYPE_BUFFER:
98                         return x;
99                 case IMAGE_TYPE_1D_ARRAY:
100                 case IMAGE_TYPE_2D:
101                         return string("ivec2(" + x + "," + y + ")");
102                 case IMAGE_TYPE_2D_ARRAY:
103                 case IMAGE_TYPE_3D:
104                 case IMAGE_TYPE_CUBE:
105                 case IMAGE_TYPE_CUBE_ARRAY:
106                         return string("ivec3(" + x + "," + y + "," + z + ")");
107                 default:
108                         DE_ASSERT(false);
109                         return DE_NULL;
110         }
111 }
112
113 static string getAtomicFuncArgumentShaderStr (const AtomicOperation     op,
114                                                                                           const string&                 x,
115                                                                                           const string&                 y,
116                                                                                           const string&                 z,
117                                                                                           const IVec3&                  gridSize)
118 {
119         switch (op)
120         {
121                 case ATOMIC_OPERATION_ADD:
122                 case ATOMIC_OPERATION_MIN:
123                 case ATOMIC_OPERATION_MAX:
124                 case ATOMIC_OPERATION_AND:
125                 case ATOMIC_OPERATION_OR:
126                 case ATOMIC_OPERATION_XOR:
127                         return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
128                 case ATOMIC_OPERATION_EXCHANGE:
129                         return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
130                 default:
131                         DE_ASSERT(false);
132                         return DE_NULL;
133         }
134 }
135
136 static string getAtomicOperationCaseName (const AtomicOperation op)
137 {
138         switch (op)
139         {
140                 case ATOMIC_OPERATION_ADD:                      return string("add");
141                 case ATOMIC_OPERATION_MIN:                      return string("min");
142                 case ATOMIC_OPERATION_MAX:                      return string("max");
143                 case ATOMIC_OPERATION_AND:                      return string("and");
144                 case ATOMIC_OPERATION_OR:                       return string("or");
145                 case ATOMIC_OPERATION_XOR:                      return string("xor");
146                 case ATOMIC_OPERATION_EXCHANGE:         return string("exchange");
147                 default:
148                         DE_ASSERT(false);
149                         return DE_NULL;
150         }
151 }
152
153 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
154 {
155         switch (op)
156         {
157                 case ATOMIC_OPERATION_ADD:                      return string("imageAtomicAdd");
158                 case ATOMIC_OPERATION_MIN:                      return string("imageAtomicMin");
159                 case ATOMIC_OPERATION_MAX:                      return string("imageAtomicMax");
160                 case ATOMIC_OPERATION_AND:                      return string("imageAtomicAnd");
161                 case ATOMIC_OPERATION_OR:                       return string("imageAtomicOr");
162                 case ATOMIC_OPERATION_XOR:                      return string("imageAtomicXor");
163                 case ATOMIC_OPERATION_EXCHANGE:         return string("imageAtomicExchange");
164                 default:
165                         DE_ASSERT(false);
166                         return DE_NULL;
167         }
168 }
169
170 static deInt32 getOperationInitialValue (const AtomicOperation op)
171 {
172         switch (op)
173         {
174                 // \note 18 is just an arbitrary small nonzero value.
175                 case ATOMIC_OPERATION_ADD:                      return 18;
176                 case ATOMIC_OPERATION_MIN:                      return (1 << 15) - 1;
177                 case ATOMIC_OPERATION_MAX:                      return 18;
178                 case ATOMIC_OPERATION_AND:                      return (1 << 15) - 1;
179                 case ATOMIC_OPERATION_OR:                       return 18;
180                 case ATOMIC_OPERATION_XOR:                      return 18;
181                 case ATOMIC_OPERATION_EXCHANGE:         return 18;
182                 default:
183                         DE_ASSERT(false);
184                         return -1;
185         }
186 }
187
188 static deInt32 getAtomicFuncArgument (const AtomicOperation op, const IVec3& invocationID, const IVec3& gridSize)
189 {
190         const int x = invocationID.x();
191         const int y = invocationID.y();
192         const int z = invocationID.z();
193
194         switch (op)
195         {
196                 // \note Fall-throughs.
197                 case ATOMIC_OPERATION_ADD:
198                 case ATOMIC_OPERATION_MIN:
199                 case ATOMIC_OPERATION_MAX:
200                 case ATOMIC_OPERATION_AND:
201                 case ATOMIC_OPERATION_OR:
202                 case ATOMIC_OPERATION_XOR:
203                         return x*x + y*y + z*z;
204                 case ATOMIC_OPERATION_EXCHANGE:
205                         return (z*gridSize.x() + x)*gridSize.y() + y;
206                 default:
207                         DE_ASSERT(false);
208                         return -1;
209         }
210 }
211
212 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
213 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
214 {
215         return  op == ATOMIC_OPERATION_ADD ||
216                         op == ATOMIC_OPERATION_MIN ||
217                         op == ATOMIC_OPERATION_MAX ||
218                         op == ATOMIC_OPERATION_AND ||
219                         op == ATOMIC_OPERATION_OR ||
220                         op == ATOMIC_OPERATION_XOR;
221 }
222
223 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
224 static deInt32 computeBinaryAtomicOperationResult (const AtomicOperation op, const deInt32 a, const deInt32 b)
225 {
226         switch (op)
227         {
228                 case ATOMIC_OPERATION_ADD:                      return a + b;
229                 case ATOMIC_OPERATION_MIN:                      return de::min(a, b);
230                 case ATOMIC_OPERATION_MAX:                      return de::max(a, b);
231                 case ATOMIC_OPERATION_AND:                      return a & b;
232                 case ATOMIC_OPERATION_OR:                       return a | b;
233                 case ATOMIC_OPERATION_XOR:                      return a ^ b;
234                 case ATOMIC_OPERATION_EXCHANGE:         return b;
235                 default:
236                         DE_ASSERT(false);
237                         return -1;
238         }
239 }
240
241 class BinaryAtomicEndResultCase : public vkt::TestCase
242 {
243 public:
244                                                                 BinaryAtomicEndResultCase  (tcu::TestContext&                   testCtx,
245                                                                                                                         const string&                           name,
246                                                                                                                         const string&                           description,
247                                                                                                                         const ImageType                         imageType,
248                                                                                                                         const tcu::UVec3&                       imageSize,
249                                                                                                                         const tcu::TextureFormat&       format,
250                                                                                                                         const AtomicOperation           operation,
251                                                                                                                         const glu::GLSLVersion          glslVersion);
252
253         void                                            initPrograms                       (SourceCollections&                  sourceCollections) const;
254         TestInstance*                           createInstance                     (Context&                                    context) const;
255 private:
256
257         const ImageType                         m_imageType;
258         const tcu::UVec3                        m_imageSize;
259         const tcu::TextureFormat        m_format;
260         const AtomicOperation           m_operation;
261         const glu::GLSLVersion          m_glslVersion;
262 };
263
264 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext&                 testCtx,
265                                                                                                           const string&                         name,
266                                                                                                           const string&                         description,
267                                                                                                           const ImageType                       imageType,
268                                                                                                           const tcu::UVec3&                     imageSize,
269                                                                                                           const tcu::TextureFormat&     format,
270                                                                                                           const AtomicOperation         operation,
271                                                                                                           const glu::GLSLVersion        glslVersion)
272         : TestCase              (testCtx, name, description)
273         , m_imageType   (imageType)
274         , m_imageSize   (imageSize)
275         , m_format              (format)
276         , m_operation   (operation)
277         , m_glslVersion (glslVersion)
278 {
279 }
280
281 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
282 {
283         const string    versionDecl                             = glu::getGLSLVersionDeclaration(m_glslVersion);
284
285         const bool              uintFormat                              = isUintFormat(mapTextureFormat(m_format));
286         const bool              intFormat                               = isIntFormat(mapTextureFormat(m_format));
287         const UVec3             gridSize                                = getShaderGridSize(m_imageType, m_imageSize);
288         const string    atomicCoord                             = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
289
290         const string    atomicArgExpr                   = (uintFormat ? "uint" : intFormat ? "int" : "float")
291                                                                                         + getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
292
293         const string    atomicInvocation                = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + ", " + atomicArgExpr + ")";
294         const string    shaderImageFormatStr    = getShaderImageFormatQualifier(m_format);
295         const string    shaderImageTypeStr              = getShaderImageType(m_format, m_imageType);
296
297         string source = versionDecl + "\n"
298                                         "precision highp " + shaderImageTypeStr + ";\n"
299                                         "\n"
300                                         "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
301                                         "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
302                                         "\n"
303                                         "void main (void)\n"
304                                         "{\n"
305                                         "       int gx = int(gl_GlobalInvocationID.x);\n"
306                                         "       int gy = int(gl_GlobalInvocationID.y);\n"
307                                         "       int gz = int(gl_GlobalInvocationID.z);\n"
308                                         "       " + atomicInvocation + ";\n"
309                                         "}\n";
310
311         sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
312 }
313
314 class BinaryAtomicIntermValuesCase : public vkt::TestCase
315 {
316 public:
317                                                                 BinaryAtomicIntermValuesCase   (tcu::TestContext&                       testCtx,
318                                                                                                                                 const string&                           name,
319                                                                                                                                 const string&                           description,
320                                                                                                                                 const ImageType                         imageType,
321                                                                                                                                 const tcu::UVec3&                       imageSize,
322                                                                                                                                 const tcu::TextureFormat&       format,
323                                                                                                                                 const AtomicOperation           operation,
324                                                                                                                                 const glu::GLSLVersion          glslVersion);
325
326         void                                            initPrograms                               (SourceCollections&                  sourceCollections) const;
327         TestInstance*                           createInstance                             (Context&                                    context) const;
328 private:
329
330         const ImageType                         m_imageType;
331         const tcu::UVec3                        m_imageSize;
332         const tcu::TextureFormat        m_format;
333         const AtomicOperation           m_operation;
334         const glu::GLSLVersion          m_glslVersion;
335 };
336
337 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext&                        testCtx,
338                                                                                                                         const string&                   name,
339                                                                                                                         const string&                   description,
340                                                                                                                         const ImageType                 imageType,
341                                                                                                                         const tcu::UVec3&               imageSize,
342                                                                                                                         const TextureFormat&    format,
343                                                                                                                         const AtomicOperation   operation,
344                                                                                                                         const glu::GLSLVersion  glslVersion)
345         : TestCase              (testCtx, name, description)
346         , m_imageType   (imageType)
347         , m_imageSize   (imageSize)
348         , m_format              (format)
349         , m_operation   (operation)
350         , m_glslVersion (glslVersion)
351 {
352 }
353
354 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
355 {
356         const string    versionDecl                             = glu::getGLSLVersionDeclaration(m_glslVersion);
357
358         const bool              uintFormat                              = isUintFormat(mapTextureFormat(m_format));
359         const bool              intFormat                               = isIntFormat(mapTextureFormat(m_format));
360         const string    colorVecTypeName                = string(uintFormat ? "u" : intFormat ? "i" : "") + "vec4";
361         const UVec3             gridSize                                = getShaderGridSize(m_imageType, m_imageSize);
362         const string    atomicCoord                             = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
363         const string    invocationCoord                 = getCoordStr(m_imageType, "gx", "gy", "gz");
364         const string    atomicArgExpr                   = (uintFormat ? "uint" : intFormat ? "int" : "float")
365                                                                                         + getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
366
367         const string    atomicInvocation                = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + ", " + atomicArgExpr + ")";
368         const string    shaderImageFormatStr    = getShaderImageFormatQualifier(m_format);
369         const string    shaderImageTypeStr              = getShaderImageType(m_format, m_imageType);
370
371         string source = versionDecl + "\n"
372                                         "precision highp " + shaderImageTypeStr + ";\n"
373                                         "\n"
374                                         "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
375                                         "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
376                                         "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
377                                         "\n"
378                                         "void main (void)\n"
379                                         "{\n"
380                                         "       int gx = int(gl_GlobalInvocationID.x);\n"
381                                         "       int gy = int(gl_GlobalInvocationID.y);\n"
382                                         "       int gz = int(gl_GlobalInvocationID.z);\n"
383                                         "       imageStore(u_intermValuesImage, " + invocationCoord + ", " + colorVecTypeName + "(" + atomicInvocation + "));\n"
384                                         "}\n";
385
386         sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
387 }
388
389 class BinaryAtomicInstanceBase : public vkt::TestInstance
390 {
391 public:
392
393                                                                 BinaryAtomicInstanceBase (Context&                                              context,
394                                                                                                                   const string&                                 name,
395                                                                                                                   const ImageType                               imageType,
396                                                                                                                   const tcu::UVec3&                             imageSize,
397                                                                                                                   const TextureFormat&                  format,
398                                                                                                                   const AtomicOperation                 operation);
399
400         tcu::TestStatus                         iterate                                  (void);
401
402         virtual deUint32                        getOutputBufferSize              (void) const = 0;
403
404         virtual void                            prepareResources                 (void) = 0;
405         virtual void                            prepareDescriptors               (void) = 0;
406
407         virtual void                            commandsBeforeCompute    (const VkCommandBuffer                 cmdBuffer) const = 0;
408         virtual void                            commandsAfterCompute     (const VkCommandBuffer                 cmdBuffer) const = 0;
409
410         virtual bool                            verifyResult                     (Allocation&                                   outputBufferAllocation) const = 0;
411
412 protected:
413         const string                            m_name;
414         const ImageType                         m_imageType;
415         const tcu::UVec3                        m_imageSize;
416         const TextureFormat                     m_format;
417         const AtomicOperation           m_operation;
418
419         de::MovePtr<Buffer>                     m_outputBuffer;
420         Move<VkDescriptorPool>          m_descriptorPool;
421         Move<VkDescriptorSetLayout>     m_descriptorSetLayout;
422         Move<VkDescriptorSet>           m_descriptorSet;
423         de::MovePtr<Image>                      m_resultImage;
424         Move<VkImageView>                       m_resultImageView;
425 };
426
427 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context&                            context,
428                                                                                                         const string&                   name,
429                                                                                                         const ImageType                 imageType,
430                                                                                                         const tcu::UVec3&               imageSize,
431                                                                                                         const TextureFormat&    format,
432                                                                                                         const AtomicOperation   operation)
433         : vkt::TestInstance     (context)
434         , m_name                        (name)
435         , m_imageType           (imageType)
436         , m_imageSize           (imageSize)
437         , m_format                      (format)
438         , m_operation           (operation)
439 {
440 }
441
442 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
443 {
444         const VkDevice                  device                          = m_context.getDevice();
445         const DeviceInterface&  deviceInterface         = m_context.getDeviceInterface();
446         const VkQueue                   queue                           = m_context.getUniversalQueue();
447         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
448         Allocator&                              allocator                       = m_context.getDefaultAllocator();
449         const VkDeviceSize              imageSizeInBytes        = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
450         const VkDeviceSize              outBuffSizeInBytes      = getOutputBufferSize();
451
452         const VkImageCreateInfo imageParams     =
453         {
454                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                                    // VkStructureType                      sType;
455                 DE_NULL,                                                                                                // const void*                          pNext;
456                 (m_imageType == IMAGE_TYPE_CUBE ||
457                  m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
458                  (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
459                  (VkImageCreateFlags)0u),                                                               // VkImageCreateFlags           flags;
460                 mapImageType(m_imageType),                                                              // VkImageType                          imageType;
461                 mapTextureFormat(m_format),                                                             // VkFormat                                     format;
462                 makeExtent3D(getLayerSize(m_imageType, m_imageSize)),   // VkExtent3D                           extent;
463                 1u,                                                                                                             // deUint32                                     mipLevels;
464                 getNumLayers(m_imageType, m_imageSize),                                 // deUint32                                     arrayLayers;
465                 VK_SAMPLE_COUNT_1_BIT,                                                                  // VkSampleCountFlagBits        samples;
466                 VK_IMAGE_TILING_OPTIMAL,                                                                // VkImageTiling                        tiling;
467                 VK_IMAGE_USAGE_STORAGE_BIT |
468                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
469                 VK_IMAGE_USAGE_TRANSFER_DST_BIT,                                                // VkImageUsageFlags            usage;
470                 VK_SHARING_MODE_EXCLUSIVE,                                                              // VkSharingMode                        sharingMode;
471                 0u,                                                                                                             // deUint32                                     queueFamilyIndexCount;
472                 DE_NULL,                                                                                                // const deUint32*                      pQueueFamilyIndices;
473                 VK_IMAGE_LAYOUT_UNDEFINED,                                                              // VkImageLayout                        initialLayout;
474         };
475
476         //Create the image that is going to store results of atomic operations
477         m_resultImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
478
479         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
480
481         m_resultImageView = makeImageView(deviceInterface, device, m_resultImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
482
483         //Prepare the buffer with the initial data for the image
484         const Buffer inputBuffer(deviceInterface, device, allocator, makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
485
486         Allocation& inputBufferAllocation = inputBuffer.getAllocation();
487
488         //Prepare the initial data for the image
489         const tcu::IVec4 initialValue(getOperationInitialValue(m_operation));
490
491         tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
492         tcu::PixelBufferAccess inputPixelBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), inputBufferAllocation.getHostPtr());
493
494         for (deUint32 z = 0; z < gridSize.z(); z++)
495         for (deUint32 y = 0; y < gridSize.y(); y++)
496         for (deUint32 x = 0; x < gridSize.x(); x++)
497         {
498                 inputPixelBuffer.setPixel(initialValue, x, y, z);
499         }
500
501         flushMappedMemoryRange(deviceInterface, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), imageSizeInBytes);
502
503         // Create a buffer to store shader output copied from result image
504         m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface, device, allocator, makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
505
506         prepareResources();
507
508         prepareDescriptors();
509
510         // Create pipeline
511         const Unique<VkShaderModule>    shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
512         const Unique<VkPipelineLayout>  pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
513         const Unique<VkPipeline>                pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
514
515         // Create command buffer
516         const Unique<VkCommandPool>             cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
517         const Unique<VkCommandBuffer>   cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
518
519         beginCommandBuffer(deviceInterface, *cmdBuffer);
520
521         deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
522         deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
523
524         const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier     =
525                 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT,
526                                                                 VK_ACCESS_TRANSFER_READ_BIT,
527                                                                 *inputBuffer,
528                                                                 0ull,
529                                                                 imageSizeInBytes);
530
531         const VkImageMemoryBarrier      resultImagePreCopyBarrier =
532                 makeImageMemoryBarrier( 0u,
533                                                                 VK_ACCESS_TRANSFER_WRITE_BIT,
534                                                                 VK_IMAGE_LAYOUT_UNDEFINED,
535                                                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
536                                                                 m_resultImage->get(),
537                                                                 subresourceRange);
538
539         deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 1u, &inputBufferPostHostWriteBarrier, 1u, &resultImagePreCopyBarrier);
540
541         const VkBufferImageCopy         bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
542
543         deviceInterface.cmdCopyBufferToImage(*cmdBuffer, *inputBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &bufferImageCopyParams);
544
545         const VkImageMemoryBarrier      resultImagePostCopyBarrier      =
546                 makeImageMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
547                                                                 VK_ACCESS_SHADER_READ_BIT,
548                                                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
549                                                                 VK_IMAGE_LAYOUT_GENERAL,
550                                                                 m_resultImage->get(),
551                                                                 subresourceRange);
552
553         deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostCopyBarrier);
554
555         commandsBeforeCompute(*cmdBuffer);
556
557         deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
558
559         commandsAfterCompute(*cmdBuffer);
560
561         const VkBufferMemoryBarrier     outputBufferPreHostReadBarrier
562                 = makeBufferMemoryBarrier(      VK_ACCESS_TRANSFER_WRITE_BIT,
563                                                                         VK_ACCESS_HOST_READ_BIT,
564                                                                         m_outputBuffer->get(),
565                                                                         0ull,
566                                                                         outBuffSizeInBytes);
567
568         deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
569
570         endCommandBuffer(deviceInterface, *cmdBuffer);
571
572         submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer);
573
574         Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
575
576         invalidateMappedMemoryRange(deviceInterface, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outBuffSizeInBytes);
577
578         if (verifyResult(outputBufferAllocation))
579                 return tcu::TestStatus::pass("Comparison succeeded");
580         else
581                 return tcu::TestStatus::fail("Comparison failed");
582 }
583
584 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
585 {
586 public:
587
588                                                 BinaryAtomicEndResultInstance  (Context&                                context,
589                                                                                                                 const string&                   name,
590                                                                                                                 const ImageType                 imageType,
591                                                                                                                 const tcu::UVec3&               imageSize,
592                                                                                                                 const TextureFormat&    format,
593                                                                                                                 const AtomicOperation   operation)
594                                                         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
595
596         virtual deUint32        getOutputBufferSize                        (void) const;
597
598         virtual void            prepareResources                           (void) {}
599         virtual void            prepareDescriptors                         (void);
600
601         virtual void            commandsBeforeCompute              (const VkCommandBuffer) const {}
602         virtual void            commandsAfterCompute               (const VkCommandBuffer       cmdBuffer) const;
603
604         virtual bool            verifyResult                               (Allocation&                         outputBufferAllocation) const;
605 };
606
607 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
608 {
609         return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
610 }
611
612 void BinaryAtomicEndResultInstance::prepareDescriptors (void)
613 {
614         const VkDevice                  device                  = m_context.getDevice();
615         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
616
617         m_descriptorSetLayout =
618                 DescriptorSetLayoutBuilder()
619                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
620                 .build(deviceInterface, device);
621
622         m_descriptorPool =
623                 DescriptorPoolBuilder()
624                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
625                 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
626
627         m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
628
629         const VkDescriptorImageInfo     descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
630
631         DescriptorSetUpdateBuilder()
632                 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
633                 .update(deviceInterface, device);
634 }
635
636 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
637 {
638         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
639         const VkImageSubresourceRange   subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
640
641         const VkImageMemoryBarrier      resultImagePostDispatchBarrier =
642                 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
643                                                                 VK_ACCESS_TRANSFER_READ_BIT,
644                                                                 VK_IMAGE_LAYOUT_GENERAL,
645                                                                 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
646                                                                 m_resultImage->get(),
647                                                                 subresourceRange);
648
649         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostDispatchBarrier);
650
651         const VkBufferImageCopy         bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
652
653         deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
654 }
655
656 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation) const
657 {
658         const UVec3     gridSize                        = getShaderGridSize(m_imageType, m_imageSize);
659         const IVec3 extendedGridSize    = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
660
661         tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
662
663         for (deInt32 z = 0; z < resultBuffer.getDepth();  z++)
664         for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
665         for (deInt32 x = 0; x < resultBuffer.getWidth();  x++)
666         {
667                 deInt32 resultValue = resultBuffer.getPixelInt(x, y, z).x();
668
669                 if (isOrderIndependentAtomicOperation(m_operation))
670                 {
671                         deInt32 reference = getOperationInitialValue(m_operation);
672
673                         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
674                         {
675                                 const IVec3 gid(x + i*gridSize.x(), y, z);
676                                 reference = computeBinaryAtomicOperationResult(m_operation, reference, getAtomicFuncArgument(m_operation, gid, extendedGridSize));
677                         }
678
679                         if (resultValue != reference)
680                                 return false;
681                 }
682                 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
683                 {
684                         // Check if the end result equals one of the atomic args.
685                         bool matchFound = false;
686
687                         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
688                         {
689                                 const IVec3 gid(x + i*gridSize.x(), y, z);
690                                 matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
691                         }
692
693                         if (!matchFound)
694                                 return false;
695                 }
696                 else
697                         DE_ASSERT(false);
698         }
699         return true;
700 }
701
702 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
703 {
704         return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
705 }
706
707 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
708 {
709 public:
710
711                                                 BinaryAtomicIntermValuesInstance   (Context&                            context,
712                                                                                                                         const string&                   name,
713                                                                                                                         const ImageType                 imageType,
714                                                                                                                         const tcu::UVec3&               imageSize,
715                                                                                                                         const TextureFormat&    format,
716                                                                                                                         const AtomicOperation   operation)
717                                                         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
718
719         virtual deUint32        getOutputBufferSize                                (void) const;
720
721         virtual void            prepareResources                                   (void);
722         virtual void            prepareDescriptors                                 (void);
723
724         virtual void            commandsBeforeCompute                      (const VkCommandBuffer       cmdBuffer) const;
725         virtual void            commandsAfterCompute                       (const VkCommandBuffer       cmdBuffer) const;
726
727         virtual bool            verifyResult                                       (Allocation&                         outputBufferAllocation) const;
728
729 protected:
730
731         bool                            verifyRecursive                                    (const deInt32                       index,
732                                                                                                                         const deInt32                   valueSoFar,
733                                                                                                                         bool                                    argsUsed[NUM_INVOCATIONS_PER_PIXEL],
734                                                                                                                         const deInt32                   atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
735                                                                                                                         const deInt32                   resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
736         de::MovePtr<Image>      m_intermResultsImage;
737         Move<VkImageView>       m_intermResultsImageView;
738 };
739
740 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
741 {
742         return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
743 }
744
745 void BinaryAtomicIntermValuesInstance::prepareResources (void)
746 {
747         const VkDevice                  device                  = m_context.getDevice();
748         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
749         Allocator&                              allocator               = m_context.getDefaultAllocator();
750
751         const UVec3 layerSize                   = getLayerSize(m_imageType, m_imageSize);
752         const bool  isCubeBasedImage    = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
753         const UVec3 extendedLayerSize   = isCubeBasedImage      ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
754                                                                                                                 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
755
756         const VkImageCreateInfo imageParams =
757         {
758                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,            // VkStructureType                      sType;
759                 DE_NULL,                                                                        // const void*                          pNext;
760                 (m_imageType == IMAGE_TYPE_CUBE ||
761                  m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
762                  (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
763                  (VkImageCreateFlags)0u),                                       // VkImageCreateFlags           flags;
764                 mapImageType(m_imageType),                                      // VkImageType                          imageType;
765                 mapTextureFormat(m_format),                                     // VkFormat                                     format;
766                 makeExtent3D(extendedLayerSize),                        // VkExtent3D                           extent;
767                 1u,                                                                                     // deUint32                                     mipLevels;
768                 getNumLayers(m_imageType, m_imageSize),         // deUint32                                     arrayLayers;
769                 VK_SAMPLE_COUNT_1_BIT,                                          // VkSampleCountFlagBits        samples;
770                 VK_IMAGE_TILING_OPTIMAL,                                        // VkImageTiling                        tiling;
771                 VK_IMAGE_USAGE_STORAGE_BIT |
772                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
773                 VK_SHARING_MODE_EXCLUSIVE,                                      // VkSharingMode                        sharingMode;
774                 0u,                                                                                     // deUint32                                     queueFamilyIndexCount;
775                 DE_NULL,                                                                        // const deUint32*                      pQueueFamilyIndices;
776                 VK_IMAGE_LAYOUT_UNDEFINED,                                      // VkImageLayout                        initialLayout;
777         };
778
779         m_intermResultsImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
780
781         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
782
783         m_intermResultsImageView = makeImageView(deviceInterface, device, m_intermResultsImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
784 }
785
786 void BinaryAtomicIntermValuesInstance::prepareDescriptors (void)
787 {
788         const VkDevice                  device                  = m_context.getDevice();
789         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
790
791         m_descriptorSetLayout =
792                 DescriptorSetLayoutBuilder()
793                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
794                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
795                 .build(deviceInterface, device);
796
797         m_descriptorPool =
798                 DescriptorPoolBuilder()
799                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u)
800                 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
801
802         m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
803
804         const VkDescriptorImageInfo     descResultImageInfo                     = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
805         const VkDescriptorImageInfo     descIntermResultsImageInfo      = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
806
807         DescriptorSetUpdateBuilder()
808                 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
809                 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descIntermResultsImageInfo)
810                 .update(deviceInterface, device);
811 }
812
813 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
814 {
815         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
816         const VkImageSubresourceRange   subresourceRange        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
817
818         const VkImageMemoryBarrier      imagePreDispatchBarrier =
819                 makeImageMemoryBarrier( 0u,
820                                                                 VK_ACCESS_SHADER_WRITE_BIT,
821                                                                 VK_IMAGE_LAYOUT_UNDEFINED,
822                                                                 VK_IMAGE_LAYOUT_GENERAL,
823                                                                 m_intermResultsImage->get(),
824                                                                 subresourceRange);
825
826         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
827 }
828
829 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
830 {
831         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
832         const VkImageSubresourceRange   subresourceRange        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
833
834         const VkImageMemoryBarrier      imagePostDispatchBarrier =
835                 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
836                                                                 VK_ACCESS_TRANSFER_READ_BIT,
837                                                                 VK_IMAGE_LAYOUT_GENERAL,
838                                                                 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
839                                                                 m_intermResultsImage->get(),
840                                                                 subresourceRange);
841
842         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
843
844         const UVec3                                     layerSize                               = getLayerSize(m_imageType, m_imageSize);
845         const UVec3                                     extendedLayerSize               = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
846         const VkBufferImageCopy         bufferImageCopyParams   = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
847
848         deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
849 }
850
851 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation&        outputBufferAllocation) const
852 {
853         const UVec3     gridSize                 = getShaderGridSize(m_imageType, m_imageSize);
854         const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
855
856         tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
857
858         for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
859         for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
860         for (deUint32 x = 0; x < gridSize.x(); x++)
861         {
862                 deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL];
863                 deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
864                 bool    argsUsed[NUM_INVOCATIONS_PER_PIXEL];
865
866                 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
867                 {
868                         IVec3 gid(x + i*gridSize.x(), y, z);
869
870                         resultValues[i] = resultBuffer.getPixelInt(gid.x(), gid.y(), gid.z()).x();
871                         atomicArgs[i]   = getAtomicFuncArgument(m_operation, gid, extendedGridSize);
872                         argsUsed[i]             = false;
873                 }
874
875                 // Verify that the return values form a valid sequence.
876                 if (!verifyRecursive(0, getOperationInitialValue(m_operation), argsUsed, atomicArgs, resultValues))
877                 {
878                         return false;
879                 }
880         }
881
882         return true;
883 }
884
885 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32   index,
886                                                                                                                 const deInt32   valueSoFar,
887                                                                                                                 bool                    argsUsed[NUM_INVOCATIONS_PER_PIXEL],
888                                                                                                                 const deInt32   atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
889                                                                                                                 const deInt32   resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
890 {
891         if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
892                 return true;
893
894         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
895         {
896                 if (!argsUsed[i] && resultValues[i] == valueSoFar)
897                 {
898                         argsUsed[i] = true;
899
900                         if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
901                         {
902                                 return true;
903                         }
904
905                         argsUsed[i] = false;
906                 }
907         }
908
909         return false;
910 }
911
912 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
913 {
914         return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
915 }
916
917 } // anonymous ns
918
919 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
920 {
921         de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
922
923         struct ImageParams
924         {
925                 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
926                         : m_imageType   (imageType)
927                         , m_imageSize   (imageSize)
928                 {
929                 }
930                 const ImageType         m_imageType;
931                 const tcu::UVec3        m_imageSize;
932         };
933
934         static const ImageParams imageParamsArray[] =
935         {
936                 ImageParams(IMAGE_TYPE_1D,                      tcu::UVec3(64u, 1u, 1u)),
937                 ImageParams(IMAGE_TYPE_1D_ARRAY,        tcu::UVec3(64u, 1u, 8u)),
938                 ImageParams(IMAGE_TYPE_2D,                      tcu::UVec3(64u, 64u, 1u)),
939                 ImageParams(IMAGE_TYPE_2D_ARRAY,        tcu::UVec3(64u, 64u, 8u)),
940                 ImageParams(IMAGE_TYPE_3D,                      tcu::UVec3(64u, 64u, 8u)),
941                 ImageParams(IMAGE_TYPE_CUBE,            tcu::UVec3(64u, 64u, 1u)),
942                 ImageParams(IMAGE_TYPE_CUBE_ARRAY,      tcu::UVec3(64u, 64u, 2u))
943         };
944
945         static const tcu::TextureFormat formats[] =
946         {
947                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
948                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32)
949         };
950
951         for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
952         {
953                 const AtomicOperation operation = (AtomicOperation)operationI;
954
955                 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
956
957                 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
958                 {
959                         const ImageType  imageType = imageParamsArray[imageTypeNdx].m_imageType;
960                         const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
961
962                         de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
963
964                         for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
965                         {
966                                 const TextureFormat&    format          = formats[formatNdx];
967                                 const std::string               formatName      = getShaderImageFormatQualifier(format);
968
969                                 //!< Atomic case checks the end result of the operations, and not the intermediate return values
970                                 const string caseEndResult = formatName + "_end_result";
971                                 imageTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
972
973                                 //!< Atomic case checks the return values of the atomic function and not the end result.
974                                 const string caseIntermValues = formatName + "_intermediate_values";
975                                 imageTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
976                         }
977
978                         operationGroup->addChild(imageTypeGroup.release());
979                 }
980
981                 imageAtomicOperationsTests->addChild(operationGroup.release());
982         }
983
984         return imageAtomicOperationsTests.release();
985 }
986
987 } // image
988 } // vkt