Fix stageMask (0x1) usage in renderpass, image and texture tests
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / image / vktImageAtomicOperationTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "vktImageAtomicOperationTests.hpp"
25
26 #include "deUniquePtr.hpp"
27 #include "deStringUtil.hpp"
28
29 #include "vktTestCaseUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkImageUtil.hpp"
32 #include "vktImageTestsUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkRef.hpp"
35 #include "vkRefUtil.hpp"
36 #include "vkTypeUtil.hpp"
37
38 #include "tcuTextureUtil.hpp"
39 #include "tcuTexture.hpp"
40 #include "tcuVectorType.hpp"
41
42 namespace vkt
43 {
44 namespace image
45 {
46 namespace
47 {
48
49 using namespace vk;
50 using namespace std;
51 using de::toString;
52
53 using tcu::TextureFormat;
54 using tcu::IVec2;
55 using tcu::IVec3;
56 using tcu::UVec3;
57 using tcu::Vec4;
58 using tcu::IVec4;
59 using tcu::UVec4;
60 using tcu::CubeFace;
61 using tcu::Texture1D;
62 using tcu::Texture2D;
63 using tcu::Texture3D;
64 using tcu::Texture2DArray;
65 using tcu::TextureCube;
66 using tcu::PixelBufferAccess;
67 using tcu::ConstPixelBufferAccess;
68 using tcu::Vector;
69 using tcu::TestContext;
70
71 enum
72 {
73         NUM_INVOCATIONS_PER_PIXEL = 5u
74 };
75
76 enum AtomicOperation
77 {
78         ATOMIC_OPERATION_ADD = 0,
79         ATOMIC_OPERATION_MIN,
80         ATOMIC_OPERATION_MAX,
81         ATOMIC_OPERATION_AND,
82         ATOMIC_OPERATION_OR,
83         ATOMIC_OPERATION_XOR,
84         ATOMIC_OPERATION_EXCHANGE,
85         ATOMIC_OPERATION_COMPARE_EXCHANGE,
86
87         ATOMIC_OPERATION_LAST
88 };
89
90 static string getCoordStr (const ImageType              imageType,
91                                                    const std::string&   x,
92                                                    const std::string&   y,
93                                                    const std::string&   z)
94 {
95         switch (imageType)
96         {
97                 case IMAGE_TYPE_1D:
98                 case IMAGE_TYPE_BUFFER:
99                         return x;
100                 case IMAGE_TYPE_1D_ARRAY:
101                 case IMAGE_TYPE_2D:
102                         return string("ivec2(" + x + "," + y + ")");
103                 case IMAGE_TYPE_2D_ARRAY:
104                 case IMAGE_TYPE_3D:
105                 case IMAGE_TYPE_CUBE:
106                 case IMAGE_TYPE_CUBE_ARRAY:
107                         return string("ivec3(" + x + "," + y + "," + z + ")");
108                 default:
109                         DE_ASSERT(false);
110                         return DE_NULL;
111         }
112 }
113
114 static string getAtomicFuncArgumentShaderStr (const AtomicOperation     op,
115                                                                                           const string&                 x,
116                                                                                           const string&                 y,
117                                                                                           const string&                 z,
118                                                                                           const IVec3&                  gridSize)
119 {
120         switch (op)
121         {
122                 case ATOMIC_OPERATION_ADD:
123                 case ATOMIC_OPERATION_MIN:
124                 case ATOMIC_OPERATION_MAX:
125                 case ATOMIC_OPERATION_AND:
126                 case ATOMIC_OPERATION_OR:
127                 case ATOMIC_OPERATION_XOR:
128                         return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
129                 case ATOMIC_OPERATION_EXCHANGE:
130                 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
131                         return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
132                 default:
133                         DE_ASSERT(false);
134                         return DE_NULL;
135         }
136 }
137
138 static string getAtomicOperationCaseName (const AtomicOperation op)
139 {
140         switch (op)
141         {
142                 case ATOMIC_OPERATION_ADD:                              return string("add");
143                 case ATOMIC_OPERATION_MIN:                              return string("min");
144                 case ATOMIC_OPERATION_MAX:                              return string("max");
145                 case ATOMIC_OPERATION_AND:                              return string("and");
146                 case ATOMIC_OPERATION_OR:                               return string("or");
147                 case ATOMIC_OPERATION_XOR:                              return string("xor");
148                 case ATOMIC_OPERATION_EXCHANGE:                 return string("exchange");
149                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("compare_exchange");
150                 default:
151                         DE_ASSERT(false);
152                         return DE_NULL;
153         }
154 }
155
156 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
157 {
158         switch (op)
159         {
160                 case ATOMIC_OPERATION_ADD:                              return string("imageAtomicAdd");
161                 case ATOMIC_OPERATION_MIN:                              return string("imageAtomicMin");
162                 case ATOMIC_OPERATION_MAX:                              return string("imageAtomicMax");
163                 case ATOMIC_OPERATION_AND:                              return string("imageAtomicAnd");
164                 case ATOMIC_OPERATION_OR:                               return string("imageAtomicOr");
165                 case ATOMIC_OPERATION_XOR:                              return string("imageAtomicXor");
166                 case ATOMIC_OPERATION_EXCHANGE:                 return string("imageAtomicExchange");
167                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("imageAtomicCompSwap");
168                 default:
169                         DE_ASSERT(false);
170                         return DE_NULL;
171         }
172 }
173
174 static deInt32 getOperationInitialValue (const AtomicOperation op)
175 {
176         switch (op)
177         {
178                 // \note 18 is just an arbitrary small nonzero value.
179                 case ATOMIC_OPERATION_ADD:                              return 18;
180                 case ATOMIC_OPERATION_MIN:                              return (1 << 15) - 1;
181                 case ATOMIC_OPERATION_MAX:                              return 18;
182                 case ATOMIC_OPERATION_AND:                              return (1 << 15) - 1;
183                 case ATOMIC_OPERATION_OR:                               return 18;
184                 case ATOMIC_OPERATION_XOR:                              return 18;
185                 case ATOMIC_OPERATION_EXCHANGE:                 return 18;
186                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 18;
187                 default:
188                         DE_ASSERT(false);
189                         return -1;
190         }
191 }
192
193 static deInt32 getAtomicFuncArgument (const AtomicOperation op, const IVec3& invocationID, const IVec3& gridSize)
194 {
195         const int x = invocationID.x();
196         const int y = invocationID.y();
197         const int z = invocationID.z();
198
199         switch (op)
200         {
201                 // \note Fall-throughs.
202                 case ATOMIC_OPERATION_ADD:
203                 case ATOMIC_OPERATION_MIN:
204                 case ATOMIC_OPERATION_MAX:
205                 case ATOMIC_OPERATION_AND:
206                 case ATOMIC_OPERATION_OR:
207                 case ATOMIC_OPERATION_XOR:
208                         return x*x + y*y + z*z;
209                 case ATOMIC_OPERATION_EXCHANGE:
210                 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
211                         return (z*gridSize.x() + x)*gridSize.y() + y;
212                 default:
213                         DE_ASSERT(false);
214                         return -1;
215         }
216 }
217
218 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
219 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
220 {
221         return  op == ATOMIC_OPERATION_ADD ||
222                         op == ATOMIC_OPERATION_MIN ||
223                         op == ATOMIC_OPERATION_MAX ||
224                         op == ATOMIC_OPERATION_AND ||
225                         op == ATOMIC_OPERATION_OR ||
226                         op == ATOMIC_OPERATION_XOR;
227 }
228
229 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
230 static deInt32 computeBinaryAtomicOperationResult (const AtomicOperation op, const deInt32 a, const deInt32 b)
231 {
232         switch (op)
233         {
234                 case ATOMIC_OPERATION_ADD:                              return a + b;
235                 case ATOMIC_OPERATION_MIN:                              return de::min(a, b);
236                 case ATOMIC_OPERATION_MAX:                              return de::max(a, b);
237                 case ATOMIC_OPERATION_AND:                              return a & b;
238                 case ATOMIC_OPERATION_OR:                               return a | b;
239                 case ATOMIC_OPERATION_XOR:                              return a ^ b;
240                 case ATOMIC_OPERATION_EXCHANGE:                 return b;
241                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return (a == 18) ? b : a;
242                 default:
243                         DE_ASSERT(false);
244                         return -1;
245         }
246 }
247
248 class BinaryAtomicEndResultCase : public vkt::TestCase
249 {
250 public:
251                                                                 BinaryAtomicEndResultCase  (tcu::TestContext&                   testCtx,
252                                                                                                                         const string&                           name,
253                                                                                                                         const string&                           description,
254                                                                                                                         const ImageType                         imageType,
255                                                                                                                         const tcu::UVec3&                       imageSize,
256                                                                                                                         const tcu::TextureFormat&       format,
257                                                                                                                         const AtomicOperation           operation,
258                                                                                                                         const glu::GLSLVersion          glslVersion);
259
260         void                                            initPrograms                       (SourceCollections&                  sourceCollections) const;
261         TestInstance*                           createInstance                     (Context&                                    context) const;
262 private:
263
264         const ImageType                         m_imageType;
265         const tcu::UVec3                        m_imageSize;
266         const tcu::TextureFormat        m_format;
267         const AtomicOperation           m_operation;
268         const glu::GLSLVersion          m_glslVersion;
269 };
270
271 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext&                 testCtx,
272                                                                                                           const string&                         name,
273                                                                                                           const string&                         description,
274                                                                                                           const ImageType                       imageType,
275                                                                                                           const tcu::UVec3&                     imageSize,
276                                                                                                           const tcu::TextureFormat&     format,
277                                                                                                           const AtomicOperation         operation,
278                                                                                                           const glu::GLSLVersion        glslVersion)
279         : TestCase              (testCtx, name, description)
280         , m_imageType   (imageType)
281         , m_imageSize   (imageSize)
282         , m_format              (format)
283         , m_operation   (operation)
284         , m_glslVersion (glslVersion)
285 {
286 }
287
288 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
289 {
290         const string    versionDecl                             = glu::getGLSLVersionDeclaration(m_glslVersion);
291
292         const bool              uintFormat                              = isUintFormat(mapTextureFormat(m_format));
293         const bool              intFormat                               = isIntFormat(mapTextureFormat(m_format));
294         const UVec3             gridSize                                = getShaderGridSize(m_imageType, m_imageSize);
295         const string    atomicCoord                             = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
296
297         const string    atomicArgExpr                   = (uintFormat ? "uint" : intFormat ? "int" : "float")
298                                                                                         + getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
299
300         const string    compareExchangeStr              = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ? ", 18" + string(uintFormat ? "u" : "") : "";
301         const string    atomicInvocation                = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
302         const string    shaderImageFormatStr    = getShaderImageFormatQualifier(m_format);
303         const string    shaderImageTypeStr              = getShaderImageType(m_format, m_imageType);
304
305         string source = versionDecl + "\n"
306                                         "precision highp " + shaderImageTypeStr + ";\n"
307                                         "\n"
308                                         "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
309                                         "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
310                                         "\n"
311                                         "void main (void)\n"
312                                         "{\n"
313                                         "       int gx = int(gl_GlobalInvocationID.x);\n"
314                                         "       int gy = int(gl_GlobalInvocationID.y);\n"
315                                         "       int gz = int(gl_GlobalInvocationID.z);\n"
316                                         "       " + atomicInvocation + ";\n"
317                                         "}\n";
318
319         sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
320 }
321
322 class BinaryAtomicIntermValuesCase : public vkt::TestCase
323 {
324 public:
325                                                                 BinaryAtomicIntermValuesCase   (tcu::TestContext&                       testCtx,
326                                                                                                                                 const string&                           name,
327                                                                                                                                 const string&                           description,
328                                                                                                                                 const ImageType                         imageType,
329                                                                                                                                 const tcu::UVec3&                       imageSize,
330                                                                                                                                 const tcu::TextureFormat&       format,
331                                                                                                                                 const AtomicOperation           operation,
332                                                                                                                                 const glu::GLSLVersion          glslVersion);
333
334         void                                            initPrograms                               (SourceCollections&                  sourceCollections) const;
335         TestInstance*                           createInstance                             (Context&                                    context) const;
336 private:
337
338         const ImageType                         m_imageType;
339         const tcu::UVec3                        m_imageSize;
340         const tcu::TextureFormat        m_format;
341         const AtomicOperation           m_operation;
342         const glu::GLSLVersion          m_glslVersion;
343 };
344
345 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext&                        testCtx,
346                                                                                                                         const string&                   name,
347                                                                                                                         const string&                   description,
348                                                                                                                         const ImageType                 imageType,
349                                                                                                                         const tcu::UVec3&               imageSize,
350                                                                                                                         const TextureFormat&    format,
351                                                                                                                         const AtomicOperation   operation,
352                                                                                                                         const glu::GLSLVersion  glslVersion)
353         : TestCase              (testCtx, name, description)
354         , m_imageType   (imageType)
355         , m_imageSize   (imageSize)
356         , m_format              (format)
357         , m_operation   (operation)
358         , m_glslVersion (glslVersion)
359 {
360 }
361
362 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
363 {
364         const string    versionDecl                             = glu::getGLSLVersionDeclaration(m_glslVersion);
365
366         const bool              uintFormat                              = isUintFormat(mapTextureFormat(m_format));
367         const bool              intFormat                               = isIntFormat(mapTextureFormat(m_format));
368         const string    colorVecTypeName                = string(uintFormat ? "u" : intFormat ? "i" : "") + "vec4";
369         const UVec3             gridSize                                = getShaderGridSize(m_imageType, m_imageSize);
370         const string    atomicCoord                             = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
371         const string    invocationCoord                 = getCoordStr(m_imageType, "gx", "gy", "gz");
372         const string    atomicArgExpr                   = (uintFormat ? "uint" : intFormat ? "int" : "float")
373                                                                                         + getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
374
375         const string    compareExchangeStr              = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ? ", 18" + string(uintFormat ? "u" : "")  : "";
376         const string    atomicInvocation                = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
377         const string    shaderImageFormatStr    = getShaderImageFormatQualifier(m_format);
378         const string    shaderImageTypeStr              = getShaderImageType(m_format, m_imageType);
379
380         string source = versionDecl + "\n"
381                                         "precision highp " + shaderImageTypeStr + ";\n"
382                                         "\n"
383                                         "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
384                                         "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
385                                         "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
386                                         "\n"
387                                         "void main (void)\n"
388                                         "{\n"
389                                         "       int gx = int(gl_GlobalInvocationID.x);\n"
390                                         "       int gy = int(gl_GlobalInvocationID.y);\n"
391                                         "       int gz = int(gl_GlobalInvocationID.z);\n"
392                                         "       imageStore(u_intermValuesImage, " + invocationCoord + ", " + colorVecTypeName + "(" + atomicInvocation + "));\n"
393                                         "}\n";
394
395         sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
396 }
397
398 class BinaryAtomicInstanceBase : public vkt::TestInstance
399 {
400 public:
401
402                                                                 BinaryAtomicInstanceBase (Context&                                              context,
403                                                                                                                   const string&                                 name,
404                                                                                                                   const ImageType                               imageType,
405                                                                                                                   const tcu::UVec3&                             imageSize,
406                                                                                                                   const TextureFormat&                  format,
407                                                                                                                   const AtomicOperation                 operation);
408
409         tcu::TestStatus                         iterate                                  (void);
410
411         virtual deUint32                        getOutputBufferSize              (void) const = 0;
412
413         virtual void                            prepareResources                 (void) = 0;
414         virtual void                            prepareDescriptors               (void) = 0;
415
416         virtual void                            commandsBeforeCompute    (const VkCommandBuffer                 cmdBuffer) const = 0;
417         virtual void                            commandsAfterCompute     (const VkCommandBuffer                 cmdBuffer) const = 0;
418
419         virtual bool                            verifyResult                     (Allocation&                                   outputBufferAllocation) const = 0;
420
421 protected:
422         const string                            m_name;
423         const ImageType                         m_imageType;
424         const tcu::UVec3                        m_imageSize;
425         const TextureFormat                     m_format;
426         const AtomicOperation           m_operation;
427
428         de::MovePtr<Buffer>                     m_outputBuffer;
429         Move<VkDescriptorPool>          m_descriptorPool;
430         Move<VkDescriptorSetLayout>     m_descriptorSetLayout;
431         Move<VkDescriptorSet>           m_descriptorSet;
432         de::MovePtr<Image>                      m_resultImage;
433         Move<VkImageView>                       m_resultImageView;
434 };
435
436 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context&                            context,
437                                                                                                         const string&                   name,
438                                                                                                         const ImageType                 imageType,
439                                                                                                         const tcu::UVec3&               imageSize,
440                                                                                                         const TextureFormat&    format,
441                                                                                                         const AtomicOperation   operation)
442         : vkt::TestInstance     (context)
443         , m_name                        (name)
444         , m_imageType           (imageType)
445         , m_imageSize           (imageSize)
446         , m_format                      (format)
447         , m_operation           (operation)
448 {
449 }
450
451 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
452 {
453         const VkDevice                  device                          = m_context.getDevice();
454         const DeviceInterface&  deviceInterface         = m_context.getDeviceInterface();
455         const VkQueue                   queue                           = m_context.getUniversalQueue();
456         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
457         Allocator&                              allocator                       = m_context.getDefaultAllocator();
458         const VkDeviceSize              imageSizeInBytes        = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
459         const VkDeviceSize              outBuffSizeInBytes      = getOutputBufferSize();
460
461         const VkImageCreateInfo imageParams     =
462         {
463                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                                    // VkStructureType                      sType;
464                 DE_NULL,                                                                                                // const void*                          pNext;
465                 (m_imageType == IMAGE_TYPE_CUBE ||
466                  m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
467                  (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
468                  (VkImageCreateFlags)0u),                                                               // VkImageCreateFlags           flags;
469                 mapImageType(m_imageType),                                                              // VkImageType                          imageType;
470                 mapTextureFormat(m_format),                                                             // VkFormat                                     format;
471                 makeExtent3D(getLayerSize(m_imageType, m_imageSize)),   // VkExtent3D                           extent;
472                 1u,                                                                                                             // deUint32                                     mipLevels;
473                 getNumLayers(m_imageType, m_imageSize),                                 // deUint32                                     arrayLayers;
474                 VK_SAMPLE_COUNT_1_BIT,                                                                  // VkSampleCountFlagBits        samples;
475                 VK_IMAGE_TILING_OPTIMAL,                                                                // VkImageTiling                        tiling;
476                 VK_IMAGE_USAGE_STORAGE_BIT |
477                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
478                 VK_IMAGE_USAGE_TRANSFER_DST_BIT,                                                // VkImageUsageFlags            usage;
479                 VK_SHARING_MODE_EXCLUSIVE,                                                              // VkSharingMode                        sharingMode;
480                 0u,                                                                                                             // deUint32                                     queueFamilyIndexCount;
481                 DE_NULL,                                                                                                // const deUint32*                      pQueueFamilyIndices;
482                 VK_IMAGE_LAYOUT_UNDEFINED,                                                              // VkImageLayout                        initialLayout;
483         };
484
485         //Create the image that is going to store results of atomic operations
486         m_resultImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
487
488         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
489
490         m_resultImageView = makeImageView(deviceInterface, device, m_resultImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
491
492         //Prepare the buffer with the initial data for the image
493         const Buffer inputBuffer(deviceInterface, device, allocator, makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
494
495         Allocation& inputBufferAllocation = inputBuffer.getAllocation();
496
497         //Prepare the initial data for the image
498         const tcu::IVec4 initialValue(getOperationInitialValue(m_operation));
499
500         tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
501         tcu::PixelBufferAccess inputPixelBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), inputBufferAllocation.getHostPtr());
502
503         for (deUint32 z = 0; z < gridSize.z(); z++)
504         for (deUint32 y = 0; y < gridSize.y(); y++)
505         for (deUint32 x = 0; x < gridSize.x(); x++)
506         {
507                 inputPixelBuffer.setPixel(initialValue, x, y, z);
508         }
509
510         flushMappedMemoryRange(deviceInterface, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), imageSizeInBytes);
511
512         // Create a buffer to store shader output copied from result image
513         m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface, device, allocator, makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
514
515         prepareResources();
516
517         prepareDescriptors();
518
519         // Create pipeline
520         const Unique<VkShaderModule>    shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
521         const Unique<VkPipelineLayout>  pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
522         const Unique<VkPipeline>                pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
523
524         // Create command buffer
525         const Unique<VkCommandPool>             cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
526         const Unique<VkCommandBuffer>   cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
527
528         beginCommandBuffer(deviceInterface, *cmdBuffer);
529
530         deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
531         deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
532
533         const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier     =
534                 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT,
535                                                                 VK_ACCESS_TRANSFER_READ_BIT,
536                                                                 *inputBuffer,
537                                                                 0ull,
538                                                                 imageSizeInBytes);
539
540         const VkImageMemoryBarrier      resultImagePreCopyBarrier =
541                 makeImageMemoryBarrier( 0u,
542                                                                 VK_ACCESS_TRANSFER_WRITE_BIT,
543                                                                 VK_IMAGE_LAYOUT_UNDEFINED,
544                                                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
545                                                                 m_resultImage->get(),
546                                                                 subresourceRange);
547
548         deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 1u, &inputBufferPostHostWriteBarrier, 1u, &resultImagePreCopyBarrier);
549
550         const VkBufferImageCopy         bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
551
552         deviceInterface.cmdCopyBufferToImage(*cmdBuffer, *inputBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &bufferImageCopyParams);
553
554         const VkImageMemoryBarrier      resultImagePostCopyBarrier      =
555                 makeImageMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
556                                                                 VK_ACCESS_SHADER_READ_BIT,
557                                                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
558                                                                 VK_IMAGE_LAYOUT_GENERAL,
559                                                                 m_resultImage->get(),
560                                                                 subresourceRange);
561
562         deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostCopyBarrier);
563
564         commandsBeforeCompute(*cmdBuffer);
565
566         deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
567
568         commandsAfterCompute(*cmdBuffer);
569
570         const VkBufferMemoryBarrier     outputBufferPreHostReadBarrier
571                 = makeBufferMemoryBarrier(      VK_ACCESS_TRANSFER_WRITE_BIT,
572                                                                         VK_ACCESS_HOST_READ_BIT,
573                                                                         m_outputBuffer->get(),
574                                                                         0ull,
575                                                                         outBuffSizeInBytes);
576
577         deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
578
579         endCommandBuffer(deviceInterface, *cmdBuffer);
580
581         submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer);
582
583         Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
584
585         invalidateMappedMemoryRange(deviceInterface, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outBuffSizeInBytes);
586
587         if (verifyResult(outputBufferAllocation))
588                 return tcu::TestStatus::pass("Comparison succeeded");
589         else
590                 return tcu::TestStatus::fail("Comparison failed");
591 }
592
593 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
594 {
595 public:
596
597                                                 BinaryAtomicEndResultInstance  (Context&                                context,
598                                                                                                                 const string&                   name,
599                                                                                                                 const ImageType                 imageType,
600                                                                                                                 const tcu::UVec3&               imageSize,
601                                                                                                                 const TextureFormat&    format,
602                                                                                                                 const AtomicOperation   operation)
603                                                         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
604
605         virtual deUint32        getOutputBufferSize                        (void) const;
606
607         virtual void            prepareResources                           (void) {}
608         virtual void            prepareDescriptors                         (void);
609
610         virtual void            commandsBeforeCompute              (const VkCommandBuffer) const {}
611         virtual void            commandsAfterCompute               (const VkCommandBuffer       cmdBuffer) const;
612
613         virtual bool            verifyResult                               (Allocation&                         outputBufferAllocation) const;
614 };
615
616 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
617 {
618         return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
619 }
620
621 void BinaryAtomicEndResultInstance::prepareDescriptors (void)
622 {
623         const VkDevice                  device                  = m_context.getDevice();
624         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
625
626         m_descriptorSetLayout =
627                 DescriptorSetLayoutBuilder()
628                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
629                 .build(deviceInterface, device);
630
631         m_descriptorPool =
632                 DescriptorPoolBuilder()
633                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
634                 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
635
636         m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
637
638         const VkDescriptorImageInfo     descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
639
640         DescriptorSetUpdateBuilder()
641                 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
642                 .update(deviceInterface, device);
643 }
644
645 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
646 {
647         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
648         const VkImageSubresourceRange   subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
649
650         const VkImageMemoryBarrier      resultImagePostDispatchBarrier =
651                 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
652                                                                 VK_ACCESS_TRANSFER_READ_BIT,
653                                                                 VK_IMAGE_LAYOUT_GENERAL,
654                                                                 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
655                                                                 m_resultImage->get(),
656                                                                 subresourceRange);
657
658         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostDispatchBarrier);
659
660         const VkBufferImageCopy         bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
661
662         deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
663 }
664
665 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation) const
666 {
667         const UVec3     gridSize                        = getShaderGridSize(m_imageType, m_imageSize);
668         const IVec3 extendedGridSize    = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
669
670         tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
671
672         for (deInt32 z = 0; z < resultBuffer.getDepth();  z++)
673         for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
674         for (deInt32 x = 0; x < resultBuffer.getWidth();  x++)
675         {
676                 deInt32 resultValue = resultBuffer.getPixelInt(x, y, z).x();
677
678                 if (isOrderIndependentAtomicOperation(m_operation))
679                 {
680                         deInt32 reference = getOperationInitialValue(m_operation);
681
682                         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
683                         {
684                                 const IVec3 gid(x + i*gridSize.x(), y, z);
685                                 reference = computeBinaryAtomicOperationResult(m_operation, reference, getAtomicFuncArgument(m_operation, gid, extendedGridSize));
686                         }
687
688                         if (resultValue != reference)
689                                 return false;
690                 }
691                 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
692                 {
693                         // Check if the end result equals one of the atomic args.
694                         bool matchFound = false;
695
696                         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
697                         {
698                                 const IVec3 gid(x + i*gridSize.x(), y, z);
699                                 matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
700                         }
701
702                         if (!matchFound)
703                                 return false;
704                 }
705                 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
706                 {
707                         // Check if the end result equals one of the atomic args.
708                         bool matchFound = false;
709
710                         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
711                         {
712                                 const IVec3 gid(x + i*gridSize.x(), y, z);
713                                 matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
714                         }
715
716                         if (!matchFound)
717                                 return false;
718                 }
719                 else
720                         DE_ASSERT(false);
721         }
722         return true;
723 }
724
725 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
726 {
727         return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
728 }
729
730 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
731 {
732 public:
733
734                                                 BinaryAtomicIntermValuesInstance   (Context&                            context,
735                                                                                                                         const string&                   name,
736                                                                                                                         const ImageType                 imageType,
737                                                                                                                         const tcu::UVec3&               imageSize,
738                                                                                                                         const TextureFormat&    format,
739                                                                                                                         const AtomicOperation   operation)
740                                                         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
741
742         virtual deUint32        getOutputBufferSize                                (void) const;
743
744         virtual void            prepareResources                                   (void);
745         virtual void            prepareDescriptors                                 (void);
746
747         virtual void            commandsBeforeCompute                      (const VkCommandBuffer       cmdBuffer) const;
748         virtual void            commandsAfterCompute                       (const VkCommandBuffer       cmdBuffer) const;
749
750         virtual bool            verifyResult                                       (Allocation&                         outputBufferAllocation) const;
751
752 protected:
753
754         bool                            verifyRecursive                                    (const deInt32                       index,
755                                                                                                                         const deInt32                   valueSoFar,
756                                                                                                                         bool                                    argsUsed[NUM_INVOCATIONS_PER_PIXEL],
757                                                                                                                         const deInt32                   atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
758                                                                                                                         const deInt32                   resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
759         de::MovePtr<Image>      m_intermResultsImage;
760         Move<VkImageView>       m_intermResultsImageView;
761 };
762
763 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
764 {
765         return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
766 }
767
768 void BinaryAtomicIntermValuesInstance::prepareResources (void)
769 {
770         const VkDevice                  device                  = m_context.getDevice();
771         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
772         Allocator&                              allocator               = m_context.getDefaultAllocator();
773
774         const UVec3 layerSize                   = getLayerSize(m_imageType, m_imageSize);
775         const bool  isCubeBasedImage    = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
776         const UVec3 extendedLayerSize   = isCubeBasedImage      ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
777                                                                                                                 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
778
779         const VkImageCreateInfo imageParams =
780         {
781                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,            // VkStructureType                      sType;
782                 DE_NULL,                                                                        // const void*                          pNext;
783                 (m_imageType == IMAGE_TYPE_CUBE ||
784                  m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
785                  (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
786                  (VkImageCreateFlags)0u),                                       // VkImageCreateFlags           flags;
787                 mapImageType(m_imageType),                                      // VkImageType                          imageType;
788                 mapTextureFormat(m_format),                                     // VkFormat                                     format;
789                 makeExtent3D(extendedLayerSize),                        // VkExtent3D                           extent;
790                 1u,                                                                                     // deUint32                                     mipLevels;
791                 getNumLayers(m_imageType, m_imageSize),         // deUint32                                     arrayLayers;
792                 VK_SAMPLE_COUNT_1_BIT,                                          // VkSampleCountFlagBits        samples;
793                 VK_IMAGE_TILING_OPTIMAL,                                        // VkImageTiling                        tiling;
794                 VK_IMAGE_USAGE_STORAGE_BIT |
795                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
796                 VK_SHARING_MODE_EXCLUSIVE,                                      // VkSharingMode                        sharingMode;
797                 0u,                                                                                     // deUint32                                     queueFamilyIndexCount;
798                 DE_NULL,                                                                        // const deUint32*                      pQueueFamilyIndices;
799                 VK_IMAGE_LAYOUT_UNDEFINED,                                      // VkImageLayout                        initialLayout;
800         };
801
802         m_intermResultsImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
803
804         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
805
806         m_intermResultsImageView = makeImageView(deviceInterface, device, m_intermResultsImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
807 }
808
809 void BinaryAtomicIntermValuesInstance::prepareDescriptors (void)
810 {
811         const VkDevice                  device                  = m_context.getDevice();
812         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
813
814         m_descriptorSetLayout =
815                 DescriptorSetLayoutBuilder()
816                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
817                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
818                 .build(deviceInterface, device);
819
820         m_descriptorPool =
821                 DescriptorPoolBuilder()
822                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u)
823                 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
824
825         m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
826
827         const VkDescriptorImageInfo     descResultImageInfo                     = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
828         const VkDescriptorImageInfo     descIntermResultsImageInfo      = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
829
830         DescriptorSetUpdateBuilder()
831                 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
832                 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descIntermResultsImageInfo)
833                 .update(deviceInterface, device);
834 }
835
836 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
837 {
838         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
839         const VkImageSubresourceRange   subresourceRange        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
840
841         const VkImageMemoryBarrier      imagePreDispatchBarrier =
842                 makeImageMemoryBarrier( 0u,
843                                                                 VK_ACCESS_SHADER_WRITE_BIT,
844                                                                 VK_IMAGE_LAYOUT_UNDEFINED,
845                                                                 VK_IMAGE_LAYOUT_GENERAL,
846                                                                 m_intermResultsImage->get(),
847                                                                 subresourceRange);
848
849         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
850 }
851
852 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
853 {
854         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
855         const VkImageSubresourceRange   subresourceRange        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
856
857         const VkImageMemoryBarrier      imagePostDispatchBarrier =
858                 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
859                                                                 VK_ACCESS_TRANSFER_READ_BIT,
860                                                                 VK_IMAGE_LAYOUT_GENERAL,
861                                                                 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
862                                                                 m_intermResultsImage->get(),
863                                                                 subresourceRange);
864
865         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
866
867         const UVec3                                     layerSize                               = getLayerSize(m_imageType, m_imageSize);
868         const UVec3                                     extendedLayerSize               = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
869         const VkBufferImageCopy         bufferImageCopyParams   = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
870
871         deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
872 }
873
874 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation&        outputBufferAllocation) const
875 {
876         const UVec3     gridSize                 = getShaderGridSize(m_imageType, m_imageSize);
877         const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
878
879         tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
880
881         for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
882         for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
883         for (deUint32 x = 0; x < gridSize.x(); x++)
884         {
885                 deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL];
886                 deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
887                 bool    argsUsed[NUM_INVOCATIONS_PER_PIXEL];
888
889                 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
890                 {
891                         IVec3 gid(x + i*gridSize.x(), y, z);
892
893                         resultValues[i] = resultBuffer.getPixelInt(gid.x(), gid.y(), gid.z()).x();
894                         atomicArgs[i]   = getAtomicFuncArgument(m_operation, gid, extendedGridSize);
895                         argsUsed[i]             = false;
896                 }
897
898                 // Verify that the return values form a valid sequence.
899                 if (!verifyRecursive(0, getOperationInitialValue(m_operation), argsUsed, atomicArgs, resultValues))
900                 {
901                         return false;
902                 }
903         }
904
905         return true;
906 }
907
908 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32   index,
909                                                                                                                 const deInt32   valueSoFar,
910                                                                                                                 bool                    argsUsed[NUM_INVOCATIONS_PER_PIXEL],
911                                                                                                                 const deInt32   atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
912                                                                                                                 const deInt32   resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
913 {
914         if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
915                 return true;
916
917         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
918         {
919                 if (!argsUsed[i] && resultValues[i] == valueSoFar)
920                 {
921                         argsUsed[i] = true;
922
923                         if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
924                         {
925                                 return true;
926                         }
927
928                         argsUsed[i] = false;
929                 }
930         }
931
932         return false;
933 }
934
935 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
936 {
937         return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
938 }
939
940 } // anonymous ns
941
942 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
943 {
944         de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
945
946         struct ImageParams
947         {
948                 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
949                         : m_imageType   (imageType)
950                         , m_imageSize   (imageSize)
951                 {
952                 }
953                 const ImageType         m_imageType;
954                 const tcu::UVec3        m_imageSize;
955         };
956
957         static const ImageParams imageParamsArray[] =
958         {
959                 ImageParams(IMAGE_TYPE_1D,                      tcu::UVec3(64u, 1u, 1u)),
960                 ImageParams(IMAGE_TYPE_1D_ARRAY,        tcu::UVec3(64u, 1u, 8u)),
961                 ImageParams(IMAGE_TYPE_2D,                      tcu::UVec3(64u, 64u, 1u)),
962                 ImageParams(IMAGE_TYPE_2D_ARRAY,        tcu::UVec3(64u, 64u, 8u)),
963                 ImageParams(IMAGE_TYPE_3D,                      tcu::UVec3(64u, 64u, 8u)),
964                 ImageParams(IMAGE_TYPE_CUBE,            tcu::UVec3(64u, 64u, 1u)),
965                 ImageParams(IMAGE_TYPE_CUBE_ARRAY,      tcu::UVec3(64u, 64u, 2u))
966         };
967
968         static const tcu::TextureFormat formats[] =
969         {
970                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
971                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32)
972         };
973
974         for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
975         {
976                 const AtomicOperation operation = (AtomicOperation)operationI;
977
978                 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
979
980                 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
981                 {
982                         const ImageType  imageType = imageParamsArray[imageTypeNdx].m_imageType;
983                         const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
984
985                         de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
986
987                         for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
988                         {
989                                 const TextureFormat&    format          = formats[formatNdx];
990                                 const std::string               formatName      = getShaderImageFormatQualifier(format);
991
992                                 //!< Atomic case checks the end result of the operations, and not the intermediate return values
993                                 const string caseEndResult = formatName + "_end_result";
994                                 imageTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
995
996                                 //!< Atomic case checks the return values of the atomic function and not the end result.
997                                 const string caseIntermValues = formatName + "_intermediate_values";
998                                 imageTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
999                         }
1000
1001                         operationGroup->addChild(imageTypeGroup.release());
1002                 }
1003
1004                 imageAtomicOperationsTests->addChild(operationGroup.release());
1005         }
1006
1007         return imageAtomicOperationsTests.release();
1008 }
1009
1010 } // image
1011 } // vkt