Test behaviour of color write enable with colorWriteMask
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / image / vktImageAtomicOperationTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43
44 #include "tcuTextureUtil.hpp"
45 #include "tcuTexture.hpp"
46 #include "tcuVectorType.hpp"
47 #include "tcuStringTemplate.hpp"
48
49 namespace vkt
50 {
51 namespace image
52 {
53 namespace
54 {
55
56 using namespace vk;
57 using namespace std;
58 using de::toString;
59
60 using tcu::TextureFormat;
61 using tcu::IVec2;
62 using tcu::IVec3;
63 using tcu::UVec3;
64 using tcu::Vec4;
65 using tcu::IVec4;
66 using tcu::UVec4;
67 using tcu::CubeFace;
68 using tcu::Texture1D;
69 using tcu::Texture2D;
70 using tcu::Texture3D;
71 using tcu::Texture2DArray;
72 using tcu::TextureCube;
73 using tcu::PixelBufferAccess;
74 using tcu::ConstPixelBufferAccess;
75 using tcu::Vector;
76 using tcu::TestContext;
77
78 enum
79 {
80         NUM_INVOCATIONS_PER_PIXEL = 5u
81 };
82
83 enum AtomicOperation
84 {
85         ATOMIC_OPERATION_ADD = 0,
86         ATOMIC_OPERATION_SUB,
87         ATOMIC_OPERATION_INC,
88         ATOMIC_OPERATION_DEC,
89         ATOMIC_OPERATION_MIN,
90         ATOMIC_OPERATION_MAX,
91         ATOMIC_OPERATION_AND,
92         ATOMIC_OPERATION_OR,
93         ATOMIC_OPERATION_XOR,
94         ATOMIC_OPERATION_EXCHANGE,
95         ATOMIC_OPERATION_COMPARE_EXCHANGE,
96
97         ATOMIC_OPERATION_LAST
98 };
99
100 enum class ShaderReadType
101 {
102         NORMAL = 0,
103         SPARSE,
104 };
105
106 enum class ImageBackingType
107 {
108         NORMAL = 0,
109         SPARSE,
110 };
111
112 static string getCoordStr (const ImageType              imageType,
113                                                    const std::string&   x,
114                                                    const std::string&   y,
115                                                    const std::string&   z)
116 {
117         switch (imageType)
118         {
119                 case IMAGE_TYPE_1D:
120                 case IMAGE_TYPE_BUFFER:
121                         return x;
122                 case IMAGE_TYPE_1D_ARRAY:
123                 case IMAGE_TYPE_2D:
124                         return string("ivec2(" + x + "," + y + ")");
125                 case IMAGE_TYPE_2D_ARRAY:
126                 case IMAGE_TYPE_3D:
127                 case IMAGE_TYPE_CUBE:
128                 case IMAGE_TYPE_CUBE_ARRAY:
129                         return string("ivec3(" + x + "," + y + "," + z + ")");
130                 default:
131                         DE_ASSERT(false);
132                         return DE_NULL;
133         }
134 }
135
136 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
137 {
138         DE_ASSERT(intFormat || uintFormat || floatFormat);
139
140         const bool is64 = (componentWidth == 64);
141
142         if (intFormat)
143                 return (is64 ? "int64_t" : "int");
144         if (uintFormat)
145                 return (is64 ? "uint64_t" : "uint");
146         if (floatFormat)
147                 return (is64 ? "double" : "float");
148
149         return "";
150 }
151
152 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
153 {
154         DE_ASSERT(intFormat || uintFormat || floatFormat);
155
156         const bool is64 = (componentWidth == 64);
157
158         if (intFormat)
159                 return (is64 ? "i64vec4" : "ivec4");
160         if (uintFormat)
161                 return (is64 ? "u64vec4" : "uvec4");
162         if (floatFormat)
163                 return (is64 ? "f64vec4" : "vec4");
164
165         return "";
166 }
167
168 static string getAtomicFuncArgumentShaderStr (const AtomicOperation     op,
169                                                                                           const string&                 x,
170                                                                                           const string&                 y,
171                                                                                           const string&                 z,
172                                                                                           const IVec3&                  gridSize)
173 {
174         switch (op)
175         {
176                 case ATOMIC_OPERATION_ADD:
177                 case ATOMIC_OPERATION_AND:
178                 case ATOMIC_OPERATION_OR:
179                 case ATOMIC_OPERATION_XOR:
180                         return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
181                 case ATOMIC_OPERATION_MIN:
182                 case ATOMIC_OPERATION_MAX:
183                         // multiply by (1-2*(value % 2) to make half of the data negative
184                         // this will result in generating large numbers for uint formats
185                         return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
186                 case ATOMIC_OPERATION_EXCHANGE:
187                 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
188                         return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
189                 default:
190                         DE_ASSERT(false);
191                         return DE_NULL;
192         }
193 }
194
195 static string getAtomicOperationCaseName (const AtomicOperation op)
196 {
197         switch (op)
198         {
199                 case ATOMIC_OPERATION_ADD:                              return string("add");
200                 case ATOMIC_OPERATION_SUB:                              return string("sub");
201                 case ATOMIC_OPERATION_INC:                              return string("inc");
202                 case ATOMIC_OPERATION_DEC:                              return string("dec");
203                 case ATOMIC_OPERATION_MIN:                              return string("min");
204                 case ATOMIC_OPERATION_MAX:                              return string("max");
205                 case ATOMIC_OPERATION_AND:                              return string("and");
206                 case ATOMIC_OPERATION_OR:                               return string("or");
207                 case ATOMIC_OPERATION_XOR:                              return string("xor");
208                 case ATOMIC_OPERATION_EXCHANGE:                 return string("exchange");
209                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("compare_exchange");
210                 default:
211                         DE_ASSERT(false);
212                         return DE_NULL;
213         }
214 }
215
216 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
217 {
218         switch (op)
219         {
220                 case ATOMIC_OPERATION_ADD:                              return string("imageAtomicAdd");
221                 case ATOMIC_OPERATION_MIN:                              return string("imageAtomicMin");
222                 case ATOMIC_OPERATION_MAX:                              return string("imageAtomicMax");
223                 case ATOMIC_OPERATION_AND:                              return string("imageAtomicAnd");
224                 case ATOMIC_OPERATION_OR:                               return string("imageAtomicOr");
225                 case ATOMIC_OPERATION_XOR:                              return string("imageAtomicXor");
226                 case ATOMIC_OPERATION_EXCHANGE:                 return string("imageAtomicExchange");
227                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("imageAtomicCompSwap");
228                 default:
229                         DE_ASSERT(false);
230                         return DE_NULL;
231         }
232 }
233
234 template <typename T>
235 T getOperationInitialValue (const AtomicOperation op)
236 {
237         switch (op)
238         {
239                 // \note 18 is just an arbitrary small nonzero value.
240                 case ATOMIC_OPERATION_ADD:                              return 18;
241                 case ATOMIC_OPERATION_INC:                              return 18;
242                 case ATOMIC_OPERATION_SUB:                              return (1 << 24) - 1;
243                 case ATOMIC_OPERATION_DEC:                              return (1 << 24) - 1;
244                 case ATOMIC_OPERATION_MIN:                              return (1 << 15) - 1;
245                 case ATOMIC_OPERATION_MAX:                              return 18;
246                 case ATOMIC_OPERATION_AND:                              return (1 << 15) - 1;
247                 case ATOMIC_OPERATION_OR:                               return 18;
248                 case ATOMIC_OPERATION_XOR:                              return 18;
249                 case ATOMIC_OPERATION_EXCHANGE:                 return 18;
250                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 18;
251                 default:
252                         DE_ASSERT(false);
253                         return 0xFFFFFFFF;
254         }
255 }
256
257 template <>
258 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
259 {
260         switch (op)
261         {
262                 // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
263                 case ATOMIC_OPERATION_ADD:                              return 0x000000BEFFFFFF18;
264                 case ATOMIC_OPERATION_INC:                              return 0x000000BEFFFFFF18;
265                 case ATOMIC_OPERATION_SUB:                              return (1ull << 56) - 1;
266                 case ATOMIC_OPERATION_DEC:                              return (1ull << 56) - 1;
267                 case ATOMIC_OPERATION_MIN:                              return (1ull << 47) - 1;
268                 case ATOMIC_OPERATION_MAX:                              return 0x000000BEFFFFFF18;
269                 case ATOMIC_OPERATION_AND:                              return (1ull << 47) - 1;
270                 case ATOMIC_OPERATION_OR:                               return 0x000000BEFFFFFF18;
271                 case ATOMIC_OPERATION_XOR:                              return 0x000000BEFFFFFF18;
272                 case ATOMIC_OPERATION_EXCHANGE:                 return 0x000000BEFFFFFF18;
273                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 0x000000BEFFFFFF18;
274                 default:
275                         DE_ASSERT(false);
276                         return 0xFFFFFFFFFFFFFFFF;
277         }
278 }
279
280 template <>
281 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
282 {
283         return (deUint64)getOperationInitialValue<deInt64>(op);
284 }
285
286
287 template <typename T>
288 static T getAtomicFuncArgument (const AtomicOperation   op,
289                                                                 const IVec3&                    invocationID,
290                                                                 const IVec3&                    gridSize)
291 {
292         const T x = static_cast<T>(invocationID.x());
293         const T y = static_cast<T>(invocationID.y());
294         const T z = static_cast<T>(invocationID.z());
295
296         switch (op)
297         {
298                 // \note Fall-throughs.
299                 case ATOMIC_OPERATION_ADD:
300                 case ATOMIC_OPERATION_SUB:
301                 case ATOMIC_OPERATION_AND:
302                 case ATOMIC_OPERATION_OR:
303                 case ATOMIC_OPERATION_XOR:
304                         return x*x + y*y + z*z;
305                 case ATOMIC_OPERATION_INC:
306                 case ATOMIC_OPERATION_DEC:
307                         return 1;
308                 case ATOMIC_OPERATION_MIN:
309                 case ATOMIC_OPERATION_MAX:
310                         // multiply half of the data by -1
311                         return (1-2*(x % 2))*(x*x + y*y + z*z);
312                 case ATOMIC_OPERATION_EXCHANGE:
313                 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
314                         return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
315                 default:
316                         DE_ASSERT(false);
317                         return -1;
318         }
319 }
320
321 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
322 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
323 {
324         return  op == ATOMIC_OPERATION_ADD ||
325                         op == ATOMIC_OPERATION_SUB ||
326                         op == ATOMIC_OPERATION_INC ||
327                         op == ATOMIC_OPERATION_DEC ||
328                         op == ATOMIC_OPERATION_MIN ||
329                         op == ATOMIC_OPERATION_MAX ||
330                         op == ATOMIC_OPERATION_AND ||
331                         op == ATOMIC_OPERATION_OR ||
332                         op == ATOMIC_OPERATION_XOR;
333 }
334
335 //! Checks if the operation needs an SPIR-V shader.
336 static bool isSpirvAtomicOperation (const AtomicOperation op)
337 {
338         return  op == ATOMIC_OPERATION_SUB ||
339                         op == ATOMIC_OPERATION_INC ||
340                         op == ATOMIC_OPERATION_DEC;
341 }
342
343 //! Returns the SPIR-V assembler name of the given operation.
344 static std::string getSpirvAtomicOpName (const AtomicOperation op)
345 {
346         switch (op)
347         {
348         case ATOMIC_OPERATION_SUB:      return "OpAtomicISub";
349         case ATOMIC_OPERATION_INC:      return "OpAtomicIIncrement";
350         case ATOMIC_OPERATION_DEC:      return "OpAtomicIDecrement";
351         default:                                        break;
352         }
353
354         DE_ASSERT(false);
355         return "";
356 }
357
358 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
359 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
360 {
361         switch (op)
362         {
363         case ATOMIC_OPERATION_SUB:      return false;
364         case ATOMIC_OPERATION_INC:      // fallthrough
365         case ATOMIC_OPERATION_DEC:      return true;
366         default:                                        break;
367         }
368
369         DE_ASSERT(false);
370         return false;
371 }
372
373 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
374 template <typename T>
375 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
376 {
377         switch (op)
378         {
379                 case ATOMIC_OPERATION_INC:                              // fallthrough.
380                 case ATOMIC_OPERATION_ADD:                              return a + b;
381                 case ATOMIC_OPERATION_DEC:                              // fallthrough.
382                 case ATOMIC_OPERATION_SUB:                              return a - b;
383                 case ATOMIC_OPERATION_MIN:                              return de::min(a, b);
384                 case ATOMIC_OPERATION_MAX:                              return de::max(a, b);
385                 case ATOMIC_OPERATION_AND:                              return a & b;
386                 case ATOMIC_OPERATION_OR:                               return a | b;
387                 case ATOMIC_OPERATION_XOR:                              return a ^ b;
388                 case ATOMIC_OPERATION_EXCHANGE:                 return b;
389                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
390                 default:
391                         DE_ASSERT(false);
392                         return -1;
393         }
394 }
395
396 VkImageUsageFlags getUsageFlags (bool useTransfer)
397 {
398         VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
399
400         if (useTransfer)
401                 usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
402
403         return usageFlags;
404 }
405
406 void AddFillReadShader (SourceCollections&                      sourceCollections,
407                                                 const ImageType&                        imageType,
408                                                 const tcu::TextureFormat&       format,
409                                                 const string&                           componentType,
410                                                 const string&                           vec4Type)
411 {
412         const string    imageInCoord                    = getCoordStr(imageType, "gx", "gy", "gz");
413         const string    shaderImageFormatStr    = getShaderImageFormatQualifier(format);
414         const string    shaderImageTypeStr              = getShaderImageType(format, imageType);
415         const auto              componentWidth                  = getFormatComponentWidth(mapTextureFormat(format), 0u);
416         const string    extensions                              = ((componentWidth == 64u)
417                                                                                         ?       "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
418                                                                                                 "#extension GL_EXT_shader_image_int64 : require\n"
419                                                                                         :       "");
420
421
422         const string fillShader =       "#version 450\n"
423                                                                 + extensions +
424                                                                 "precision highp " + shaderImageTypeStr + ";\n"
425                                                                 "\n"
426                                                                 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
427                                                                 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
428                                                                 "\n"
429                                                                 "layout(std430, binding = 1) buffer inputBuffer\n"
430                                                                 "{\n"
431                                                                 "       "+ componentType + " data[];\n"
432                                                                 "} inBuffer;\n"
433                                                                 "\n"
434                                                                 "void main(void)\n"
435                                                                 "{\n"
436                                                                 "       int gx = int(gl_GlobalInvocationID.x);\n"
437                                                                 "       int gy = int(gl_GlobalInvocationID.y);\n"
438                                                                 "       int gz = int(gl_GlobalInvocationID.z);\n"
439                                                                 "       uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
440                                                                 "       imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
441                                                                 "}\n";
442
443         const string readShader =       "#version 450\n"
444                                                                 + extensions +
445                                                                 "precision highp " + shaderImageTypeStr + ";\n"
446                                                                 "\n"
447                                                                 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
448                                                                 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
449                                                                 "\n"
450                                                                 "layout(std430, binding = 1) buffer outputBuffer\n"
451                                                                 "{\n"
452                                                                 "       " + componentType + " data[];\n"
453                                                                 "} outBuffer;\n"
454                                                                 "\n"
455                                                                 "void main(void)\n"
456                                                                 "{\n"
457                                                                 "       int gx = int(gl_GlobalInvocationID.x);\n"
458                                                                 "       int gy = int(gl_GlobalInvocationID.y);\n"
459                                                                 "       int gz = int(gl_GlobalInvocationID.z);\n"
460                                                                 "       uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
461                                                                 "       outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
462                                                                 "}\n";
463
464
465         if ((imageType != IMAGE_TYPE_1D) &&
466                 (imageType != IMAGE_TYPE_1D_ARRAY) &&
467                 (imageType != IMAGE_TYPE_BUFFER))
468         {
469                 const string readShaderResidency  = "#version 450\n"
470                                                                                         "#extension GL_ARB_sparse_texture2 : require\n"
471                                                                                         + extensions +
472                                                                                         "precision highp " + shaderImageTypeStr + ";\n"
473                                                                                         "\n"
474                                                                                         "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
475                                                                                         "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
476                                                                                         "\n"
477                                                                                         "layout(std430, binding = 1) buffer outputBuffer\n"
478                                                                                         "{\n"
479                                                                                         "       " + componentType + " data[];\n"
480                                                                                         "} outBuffer;\n"
481                                                                                         "\n"
482                                                                                         "void main(void)\n"
483                                                                                         "{\n"
484                                                                                         "       int gx = int(gl_GlobalInvocationID.x);\n"
485                                                                                         "       int gy = int(gl_GlobalInvocationID.y);\n"
486                                                                                         "       int gz = int(gl_GlobalInvocationID.z);\n"
487                                                                                         "       uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
488                                                                                         "       outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
489                                                                                         "       " + vec4Type + " sparseValue;\n"
490                                                                                         "       sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
491                                                                                         "       if (outBuffer.data[index] != sparseValue.x)\n"
492                                                                                         "               outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
493                                                                                         "}\n";
494
495                 sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
496         }
497
498         sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
499         sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
500 }
501
502 //! Prepare the initial data for the image
503 static void initDataForImage (const VkDevice                    device,
504                                                           const DeviceInterface&        deviceInterface,
505                                                           const TextureFormat&          format,
506                                                           const AtomicOperation         operation,
507                                                           const tcu::UVec3&                     gridSize,
508                                                           Buffer&                                       buffer)
509 {
510         Allocation&                             bufferAllocation        = buffer.getAllocation();
511         const VkFormat                  imageFormat                     = mapTextureFormat(format);
512         tcu::PixelBufferAccess  pixelBuffer                     (format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
513
514         if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
515         {
516                 const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
517
518                 for (deUint32 z = 0; z < gridSize.z(); z++)
519                 for (deUint32 y = 0; y < gridSize.y(); y++)
520                 for (deUint32 x = 0; x < gridSize.x(); x++)
521                 {
522                         *((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
523                 }
524         }
525         else
526         {
527                 const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
528
529                 for (deUint32 z = 0; z < gridSize.z(); z++)
530                 for (deUint32 y = 0; y < gridSize.y(); y++)
531                 for (deUint32 x = 0; x < gridSize.x(); x++)
532                 {
533                         pixelBuffer.setPixel(initialValue, x, y, z);
534                 }
535         }
536
537         flushAlloc(deviceInterface, device, bufferAllocation);
538 }
539
540 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, ImageType imageType, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
541 {
542         const VkFormat                          format                          = mapTextureFormat(tcuFormat);
543         const VkImageType                       vkImgType                       = mapImageType(imageType);
544         const VkFormatFeatureFlags      texelBufferSupport      = (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
545         const VkFormatProperties        formatProperties        = getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
546                                                                                                                                                                                 context.getPhysicalDevice(), format);
547
548         if ((imageType == IMAGE_TYPE_BUFFER) &&
549                 ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
550                 TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
551
552         if (imageType == IMAGE_TYPE_CUBE_ARRAY)
553                 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
554
555         if (backingType == ImageBackingType::SPARSE)
556         {
557                 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
558
559                 switch (vkImgType)
560                 {
561                 case VK_IMAGE_TYPE_2D:  context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
562                 case VK_IMAGE_TYPE_3D:  context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
563                 default:                                DE_ASSERT(false); break;
564                 }
565
566                 if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, getUsageFlags(useTransfer), VK_IMAGE_TILING_OPTIMAL))
567                         TCU_THROW(NotSupportedError, "Format does not support sparse images");
568         }
569
570         if (isFloatFormat(format))
571         {
572                 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
573
574                 const VkFormatFeatureFlags      requiredFeatures        = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
575                 const auto&                                     atomicFloatFeatures     = context.getShaderAtomicFloatFeaturesEXT();
576
577                 if (!atomicFloatFeatures.shaderImageFloat32Atomics)
578                         TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
579
580                 if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
581                         TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
582
583                 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
584                         TCU_FAIL("Required format feature bits not supported");
585
586                 if (backingType == ImageBackingType::SPARSE)
587                 {
588                         if (!atomicFloatFeatures.sparseImageFloat32Atomics)
589                                 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
590
591                         if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
592                                 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
593                 }
594
595         }
596         else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
597         {
598                 context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
599
600                 const VkFormatFeatureFlags      requiredFeatures        = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
601                 const auto&                                     atomicInt64Features     = context.getShaderImageAtomicInt64FeaturesEXT();
602
603                 if (!atomicInt64Features.shaderImageInt64Atomics)
604                         TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
605
606                 if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
607                         TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
608
609                 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
610                         TCU_FAIL("Mandatory format features not supported");
611         }
612
613         if (useTransfer)
614         {
615                 const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
616                 if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
617                         TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
618         }
619
620         if (readType == ShaderReadType::SPARSE)
621         {
622                 DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
623                 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
624         }
625 }
626
627 class BinaryAtomicEndResultCase : public vkt::TestCase
628 {
629 public:
630                                                                 BinaryAtomicEndResultCase       (tcu::TestContext&                      testCtx,
631                                                                                                                          const string&                          name,
632                                                                                                                          const string&                          description,
633                                                                                                                          const ImageType                        imageType,
634                                                                                                                          const tcu::UVec3&                      imageSize,
635                                                                                                                          const tcu::TextureFormat&      format,
636                                                                                                                          const AtomicOperation          operation,
637                                                                                                                          const bool                                     useTransfer,
638                                                                                                                          const ShaderReadType           shaderReadType,
639                                                                                                                          const ImageBackingType         backingType,
640                                                                                                                          const glu::GLSLVersion         glslVersion);
641
642         void                                            initPrograms                            (SourceCollections&                     sourceCollections) const;
643         TestInstance*                           createInstance                          (Context&                                       context) const;
644         virtual void                            checkSupport                            (Context&                                       context) const;
645
646 private:
647         const ImageType                         m_imageType;
648         const tcu::UVec3                        m_imageSize;
649         const tcu::TextureFormat        m_format;
650         const AtomicOperation           m_operation;
651         const bool                                      m_useTransfer;
652         const ShaderReadType            m_readType;
653         const ImageBackingType          m_backingType;
654         const glu::GLSLVersion          m_glslVersion;
655 };
656
657 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext&                 testCtx,
658                                                                                                           const string&                         name,
659                                                                                                           const string&                         description,
660                                                                                                           const ImageType                       imageType,
661                                                                                                           const tcu::UVec3&                     imageSize,
662                                                                                                           const tcu::TextureFormat&     format,
663                                                                                                           const AtomicOperation         operation,
664                                                                                                           const bool                            useTransfer,
665                                                                                                           const ShaderReadType          shaderReadType,
666                                                                                                           const ImageBackingType        backingType,
667                                                                                                           const glu::GLSLVersion        glslVersion)
668         : TestCase              (testCtx, name, description)
669         , m_imageType   (imageType)
670         , m_imageSize   (imageSize)
671         , m_format              (format)
672         , m_operation   (operation)
673         , m_useTransfer (useTransfer)
674         , m_readType    (shaderReadType)
675         , m_backingType (backingType)
676         , m_glslVersion (glslVersion)
677 {
678 }
679
680 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
681 {
682         commonCheckSupport(context, m_format, m_imageType, m_operation, m_useTransfer, m_readType, m_backingType);
683 }
684
685 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
686 {
687         const VkFormat  imageFormat             = mapTextureFormat(m_format);
688         const deUint32  componentWidth  = getFormatComponentWidth(imageFormat, 0);
689         const bool              intFormat               = isIntFormat(imageFormat);
690         const bool              uintFormat              = isUintFormat(imageFormat);
691         const bool              floatFormat             = isFloatFormat(imageFormat);
692         const string    type                    = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
693         const string    vec4Type                = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
694
695         AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
696
697         if (isSpirvAtomicOperation(m_operation))
698         {
699                 const CaseVariant                                       caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
700                 const tcu::StringTemplate                       shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
701                 std::map<std::string, std::string>      specializations;
702
703                 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
704                 if (isSpirvAtomicNoLastArgOp(m_operation))
705                         specializations["LASTARG"] = "";
706
707                 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
708         }
709         else
710         {
711                 const string    versionDecl                             = glu::getGLSLVersionDeclaration(m_glslVersion);
712
713                 const UVec3             gridSize                                = getShaderGridSize(m_imageType, m_imageSize);
714                 const string    atomicCoord                             = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
715
716                 const string    atomicArgExpr                   = type + getAtomicFuncArgumentShaderStr(m_operation,
717                                                                                                                                                                                 "gx", "gy", "gz",
718                                                                                                                                                                                 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
719
720                 const string    compareExchangeStr              = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
721                                                                                                 (componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
722                                                                                                 : "";
723                 const string    atomicInvocation                = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
724                 const string    shaderImageFormatStr    = getShaderImageFormatQualifier(m_format);
725                 const string    shaderImageTypeStr              = getShaderImageType(m_format, m_imageType);
726                 const string    extensions                              = "#extension GL_EXT_shader_atomic_float : enable\n#extension GL_KHR_memory_scope_semantics : enable  ";
727
728                 string source = versionDecl + "\n" + extensions + "\n";
729
730                 if (64 == componentWidth)
731                 {
732                         source +=       "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
733                                                 "#extension GL_EXT_shader_image_int64 : require\n";
734                 }
735
736                 source +=       "precision highp " + shaderImageTypeStr + ";\n"
737                                         "\n"
738                                         "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
739                                         "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
740                                         "\n"
741                                         "void main (void)\n"
742                                         "{\n"
743                                         "       int gx = int(gl_GlobalInvocationID.x);\n"
744                                         "       int gy = int(gl_GlobalInvocationID.y);\n"
745                                         "       int gz = int(gl_GlobalInvocationID.z);\n"
746                                         "       " + atomicInvocation + ";\n"
747                                         "}\n";
748
749                 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
750         }
751 }
752
753 class BinaryAtomicIntermValuesCase : public vkt::TestCase
754 {
755 public:
756                                                                 BinaryAtomicIntermValuesCase    (tcu::TestContext&                      testCtx,
757                                                                                                                                  const string&                          name,
758                                                                                                                                  const string&                          description,
759                                                                                                                                  const ImageType                        imageType,
760                                                                                                                                  const tcu::UVec3&                      imageSize,
761                                                                                                                                  const tcu::TextureFormat&      format,
762                                                                                                                                  const AtomicOperation          operation,
763                                                                                                                                  const bool                                     useTransfer,
764                                                                                                                                  const ShaderReadType           shaderReadType,
765                                                                                                                                  const ImageBackingType         backingType,
766                                                                                                                                  const glu::GLSLVersion         glslVersion);
767
768         void                                            initPrograms                                    (SourceCollections&                     sourceCollections) const;
769         TestInstance*                           createInstance                                  (Context&                                       context) const;
770         virtual void                            checkSupport                                    (Context&                                       context) const;
771
772 private:
773         const ImageType                         m_imageType;
774         const tcu::UVec3                        m_imageSize;
775         const tcu::TextureFormat        m_format;
776         const AtomicOperation           m_operation;
777         const bool                                      m_useTransfer;
778         const ShaderReadType            m_readType;
779         const ImageBackingType          m_backingType;
780         const glu::GLSLVersion          m_glslVersion;
781 };
782
783 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext&                        testCtx,
784                                                                                                                         const string&                   name,
785                                                                                                                         const string&                   description,
786                                                                                                                         const ImageType                 imageType,
787                                                                                                                         const tcu::UVec3&               imageSize,
788                                                                                                                         const TextureFormat&    format,
789                                                                                                                         const AtomicOperation   operation,
790                                                                                                                         const bool                              useTransfer,
791                                                                                                                         const ShaderReadType    shaderReadType,
792                                                                                                                         const ImageBackingType  backingType,
793                                                                                                                         const glu::GLSLVersion  glslVersion)
794         : TestCase              (testCtx, name, description)
795         , m_imageType   (imageType)
796         , m_imageSize   (imageSize)
797         , m_format              (format)
798         , m_operation   (operation)
799         , m_useTransfer (useTransfer)
800         , m_readType    (shaderReadType)
801         , m_backingType (backingType)
802         , m_glslVersion (glslVersion)
803 {
804 }
805
806 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
807 {
808         commonCheckSupport(context, m_format, m_imageType, m_operation, m_useTransfer, m_readType, m_backingType);
809 }
810
811 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
812 {
813         const VkFormat  imageFormat             = mapTextureFormat(m_format);
814         const deUint32  componentWidth  = getFormatComponentWidth(imageFormat, 0);
815         const bool              intFormat               = isIntFormat(imageFormat);
816         const bool              uintFormat              = isUintFormat(imageFormat);
817         const bool              floatFormat             = isFloatFormat(imageFormat);
818         const string    type                    = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
819         const string    vec4Type                = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
820
821         AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
822
823         if (isSpirvAtomicOperation(m_operation))
824         {
825                 const CaseVariant                                       caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
826                 const tcu::StringTemplate                       shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
827                 std::map<std::string, std::string>      specializations;
828
829                 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
830                 if (isSpirvAtomicNoLastArgOp(m_operation))
831                         specializations["LASTARG"] = "";
832
833                 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
834         }
835         else
836         {
837                 const string    versionDecl                             = glu::getGLSLVersionDeclaration(m_glslVersion);
838                 const UVec3             gridSize                                = getShaderGridSize(m_imageType, m_imageSize);
839                 const string    atomicCoord                             = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
840                 const string    invocationCoord                 = getCoordStr(m_imageType, "gx", "gy", "gz");
841                 const string    atomicArgExpr                   = type + getAtomicFuncArgumentShaderStr(m_operation,
842                                                                                                                                                                                 "gx", "gy", "gz",
843                                                                                                                                                                                 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
844
845                 const string    compareExchangeStr              = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
846                                                                                                   (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
847                                                                                                   "";
848                 const string    atomicInvocation                = getAtomicOperationShaderFuncName(m_operation) +
849                                                                                                 "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
850                 const string    shaderImageFormatStr    = getShaderImageFormatQualifier(m_format);
851                 const string    shaderImageTypeStr              = getShaderImageType(m_format, m_imageType);
852                 const string    extensions                              = "#extension GL_EXT_shader_atomic_float : enable\n#extension GL_KHR_memory_scope_semantics : enable  ";
853
854                 string source = versionDecl + "\n" + extensions + "\n"
855                                                 "\n";
856
857                 if (64 == componentWidth)
858                 {
859                         source +=       "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
860                                                 "#extension GL_EXT_shader_image_int64 : require\n";
861                 }
862
863                         source +=       "precision highp " + shaderImageTypeStr + "; \n"
864                                                 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
865                                                 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
866                                                 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
867                                                 "\n"
868                                                 "void main (void)\n"
869                                                 "{\n"
870                                                 "       int gx = int(gl_GlobalInvocationID.x);\n"
871                                                 "       int gy = int(gl_GlobalInvocationID.y);\n"
872                                                 "       int gz = int(gl_GlobalInvocationID.z);\n"
873                                                 "       imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
874                                                 "}\n";
875
876                 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
877         }
878 }
879
880 class BinaryAtomicInstanceBase : public vkt::TestInstance
881 {
882 public:
883
884                                                                 BinaryAtomicInstanceBase (Context&                                              context,
885                                                                                                                   const string&                                 name,
886                                                                                                                   const ImageType                               imageType,
887                                                                                                                   const tcu::UVec3&                             imageSize,
888                                                                                                                   const TextureFormat&                  format,
889                                                                                                                   const AtomicOperation                 operation,
890                                                                                                                   const bool                                    useTransfer,
891                                                                                                                   const ShaderReadType                  shaderReadType,
892                                                                                                                   const ImageBackingType                backingType);
893
894         tcu::TestStatus                         iterate                                  (void);
895
896         virtual deUint32                        getOutputBufferSize              (void) const = 0;
897
898         virtual void                            prepareResources                 (const bool                                    useTransfer) = 0;
899         virtual void                            prepareDescriptors               (const bool                                    isTexelBuffer) = 0;
900
901         virtual void                            commandsBeforeCompute    (const VkCommandBuffer                 cmdBuffer) const = 0;
902         virtual void                            commandsAfterCompute     (const VkCommandBuffer                 cmdBuffer,
903                                                                                                                   const VkPipeline                              pipeline,
904                                                                                                                   const VkPipelineLayout                pipelineLayout,
905                                                                                                                    const VkDescriptorSet                descriptorSet,
906                                                                                                                   const VkDeviceSize&                   range,
907                                                                                                                   const bool                                    useTransfer) = 0;
908
909         virtual bool                            verifyResult                     (Allocation&                                   outputBufferAllocation,
910                                                                                                                   const bool                                    is64Bit) const = 0;
911
912 protected:
913
914         void                                            shaderFillImage                  (const VkCommandBuffer                 cmdBuffer,
915                                                                                                                   const VkBuffer&                               buffer,
916                                                                                                                   const VkPipeline                              pipeline,
917                                                                                                                   const VkPipelineLayout                pipelineLayout,
918                                                                                                                   const VkDescriptorSet                 descriptorSet,
919                                                                                                                   const VkDeviceSize&                   range,
920                                                                                                                   const tcu::UVec3&                             gridSize);
921
922         void                                            createImageAndView              (VkFormat                                               imageFormat,
923                                                                                                                  const tcu::UVec3&                              imageExent,
924                                                                                                                  bool                                                   useTransfer,
925                                                                                                                  de::MovePtr<Image>&                    imagePtr,
926                                                                                                                  Move<VkImageView>&                             imageViewPtr);
927
928         void                                            createImageResources    (const VkFormat&                                imageFormat,
929                                                                                                                  const bool                                             useTransfer);
930
931         const string                            m_name;
932         const ImageType                         m_imageType;
933         const tcu::UVec3                        m_imageSize;
934         const TextureFormat                     m_format;
935         const AtomicOperation           m_operation;
936         const bool                                      m_useTransfer;
937         const ShaderReadType            m_readType;
938         const ImageBackingType          m_backingType;
939
940         de::MovePtr<Buffer>                     m_inputBuffer;
941         de::MovePtr<Buffer>                     m_outputBuffer;
942         Move<VkBufferView>                      m_descResultBufferView;
943         Move<VkBufferView>                      m_descIntermResultsBufferView;
944         Move<VkDescriptorPool>          m_descriptorPool;
945         Move<VkDescriptorSetLayout>     m_descriptorSetLayout;
946         Move<VkDescriptorSet>           m_descriptorSet;
947
948         Move<VkDescriptorSetLayout>     m_descriptorSetLayoutNoTransfer;
949         Move<VkDescriptorPool>          m_descriptorPoolNoTransfer;
950
951         de::MovePtr<Image>                      m_resultImage;
952         Move<VkImageView>                       m_resultImageView;
953
954         std::vector<VkSemaphore>        m_waitSemaphores;
955 };
956
957 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context&                            context,
958                                                                                                         const string&                   name,
959                                                                                                         const ImageType                 imageType,
960                                                                                                         const tcu::UVec3&               imageSize,
961                                                                                                         const TextureFormat&    format,
962                                                                                                         const AtomicOperation   operation,
963                                                                                                         const bool                              useTransfer,
964                                                                                                         const ShaderReadType    shaderReadType,
965                                                                                                         const ImageBackingType  backingType)
966         : vkt::TestInstance     (context)
967         , m_name                        (name)
968         , m_imageType           (imageType)
969         , m_imageSize           (imageSize)
970         , m_format                      (format)
971         , m_operation           (operation)
972         , m_useTransfer         (useTransfer)
973         , m_readType            (shaderReadType)
974         , m_backingType         (backingType)
975 {
976 }
977
978 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
979 {
980         const VkDevice                  device                          = m_context.getDevice();
981         const DeviceInterface&  deviceInterface         = m_context.getDeviceInterface();
982         const VkQueue                   queue                           = m_context.getUniversalQueue();
983         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
984         Allocator&                              allocator                       = m_context.getDefaultAllocator();
985         const VkDeviceSize              imageSizeInBytes        = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
986         const VkDeviceSize              outBuffSizeInBytes      = getOutputBufferSize();
987         const VkFormat                  imageFormat                     = mapTextureFormat(m_format);
988         const bool                              isTexelBuffer           = (m_imageType == IMAGE_TYPE_BUFFER);
989
990         if (!isTexelBuffer)
991         {
992                 createImageResources(imageFormat, m_useTransfer);
993         }
994
995         tcu::UVec3                              gridSize                        = getShaderGridSize(m_imageType, m_imageSize);
996
997         //Prepare the buffer with the initial data for the image
998         m_inputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface,
999                                                                                                         device,
1000                                                                                                         allocator,
1001                                                                                                         makeBufferCreateInfo(imageSizeInBytes,
1002                                                                                                                                                  VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1003                                                                                                                                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1004                                                                                                                                                  (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1005                                                                                                         MemoryRequirement::HostVisible));
1006
1007         // Fill in buffer with initial data used for image.
1008         initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1009
1010         // Create a buffer to store shader output copied from result image
1011         m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface,
1012                                                                                                         device,
1013                                                                                                         allocator,
1014                                                                                                         makeBufferCreateInfo(outBuffSizeInBytes,
1015                                                                                                                                                  VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1016                                                                                                                                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1017                                                                                                                                                  (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1018                                                                                                         MemoryRequirement::HostVisible));
1019
1020         if (!isTexelBuffer)
1021         {
1022                 prepareResources(m_useTransfer);
1023         }
1024
1025         prepareDescriptors(isTexelBuffer);
1026
1027         Move<VkDescriptorSet>   descriptorSetFillImage;
1028         Move<VkShaderModule>    shaderModuleFillImage;
1029         Move<VkPipelineLayout>  pipelineLayoutFillImage;
1030         Move<VkPipeline>                pipelineFillImage;
1031
1032         Move<VkDescriptorSet>   descriptorSetReadImage;
1033         Move<VkShaderModule>    shaderModuleReadImage;
1034         Move<VkPipelineLayout>  pipelineLayoutReadImage;
1035         Move<VkPipeline>                pipelineReadImage;
1036
1037         if (!m_useTransfer)
1038         {
1039                 m_descriptorSetLayoutNoTransfer =
1040                         DescriptorSetLayoutBuilder()
1041                         .addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1042                         .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1043                         .build(deviceInterface, device);
1044
1045                 m_descriptorPoolNoTransfer =
1046                         DescriptorPoolBuilder()
1047                         .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1048                         .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1049                         .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1050
1051                 descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1052                         device,
1053                         *m_descriptorPoolNoTransfer,
1054                         *m_descriptorSetLayoutNoTransfer);
1055
1056                 descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1057                         device,
1058                         *m_descriptorPoolNoTransfer,
1059                         *m_descriptorSetLayoutNoTransfer);
1060
1061                 shaderModuleFillImage   = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1062                 pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1063                 pipelineFillImage               = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1064
1065                 if (m_readType == ShaderReadType::SPARSE)
1066                 {
1067                         shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1068                 }
1069                 else
1070                 {
1071                         shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1072                 }
1073                 pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1074                 pipelineReadImage               = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1075         }
1076
1077         // Create pipeline
1078         const Unique<VkShaderModule>    shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1079         const Unique<VkPipelineLayout>  pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1080         const Unique<VkPipeline>                pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1081
1082         // Create command buffer
1083         const Unique<VkCommandPool>             cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1084         const Unique<VkCommandBuffer>   cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1085
1086         beginCommandBuffer(deviceInterface, *cmdBuffer);
1087
1088         if (!isTexelBuffer)
1089         {
1090                 if (m_useTransfer)
1091                 {
1092                         const vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1093                         copyBufferToImage(deviceInterface,
1094                                                           *cmdBuffer,
1095                                                           *(*m_inputBuffer),
1096                                                           imageSizeInBytes,
1097                                                           bufferImageCopy,
1098                                                           VK_IMAGE_ASPECT_COLOR_BIT,
1099                                                           1,
1100                                                           getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1101                 }
1102                 else
1103                 {
1104                         shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1105                 }
1106                 commandsBeforeCompute(*cmdBuffer);
1107         }
1108
1109         deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1110         deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1111
1112         deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1113
1114         commandsAfterCompute(*cmdBuffer,
1115                                                  *pipelineReadImage,
1116                                                  *pipelineLayoutReadImage,
1117                                                  *descriptorSetReadImage,
1118                                                  outBuffSizeInBytes,
1119                                                  m_useTransfer);
1120
1121         const VkBufferMemoryBarrier     outputBufferPreHostReadBarrier
1122                 = makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1123                                                                   VK_ACCESS_HOST_READ_BIT,
1124                                                                   m_outputBuffer->get(),
1125                                                                   0ull,
1126                                                                   outBuffSizeInBytes);
1127
1128         deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1129                                                                            ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1130                                                                            VK_PIPELINE_STAGE_HOST_BIT,
1131                                                                            DE_FALSE, 0u, DE_NULL,
1132                                                                            1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1133
1134         endCommandBuffer(deviceInterface, *cmdBuffer);
1135
1136         std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1137         submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1138                 static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1139
1140         Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1141
1142         invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1143
1144         if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1145                 return tcu::TestStatus::pass("Comparison succeeded");
1146         else
1147                 return tcu::TestStatus::fail("Comparison failed");
1148 }
1149
1150 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer   cmdBuffer,
1151                                                                                                 const VkBuffer&                 buffer,
1152                                                                                                 const VkPipeline                pipeline,
1153                                                                                                 const VkPipelineLayout  pipelineLayout,
1154                                                                                                 const VkDescriptorSet   descriptorSet,
1155                                                                                                 const VkDeviceSize&             range,
1156                                                                                                 const tcu::UVec3&               gridSize)
1157 {
1158         const VkDevice                                  device                                  = m_context.getDevice();
1159         const DeviceInterface&                  deviceInterface                 = m_context.getDeviceInterface();
1160         const VkDescriptorImageInfo             descResultImageInfo             = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1161         const VkDescriptorBufferInfo    descResultBufferInfo    = makeDescriptorBufferInfo(buffer, 0, range);
1162         const VkImageSubresourceRange   subresourceRange                = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1163
1164         DescriptorSetUpdateBuilder()
1165                 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1166                 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1167                 .update(deviceInterface, device);
1168
1169         const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1170                                                                                                                                                 VK_ACCESS_SHADER_WRITE_BIT,
1171                                                                                                                                                 VK_IMAGE_LAYOUT_UNDEFINED,
1172                                                                                                                                                 VK_IMAGE_LAYOUT_GENERAL,
1173                                                                                                                                                 m_resultImage->get(),
1174                                                                                                                                                 subresourceRange);
1175
1176         deviceInterface.cmdPipelineBarrier(     cmdBuffer,
1177                                                                                 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1178                                                                                 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1179                                                                                 (VkDependencyFlags)0,
1180                                                                                 0, (const VkMemoryBarrier*)DE_NULL,
1181                                                                                 0, (const VkBufferMemoryBarrier*)DE_NULL,
1182                                                                                 1, &imageBarrierPre);
1183
1184         deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1185         deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1186
1187         deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1188
1189         const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1190                                                                                                                                                  VK_ACCESS_SHADER_READ_BIT,
1191                                                                                                                                                  VK_IMAGE_LAYOUT_GENERAL,
1192                                                                                                                                                  VK_IMAGE_LAYOUT_GENERAL,
1193                                                                                                                                                  m_resultImage->get(),
1194                                                                                                                                                  subresourceRange);
1195
1196         deviceInterface.cmdPipelineBarrier(     cmdBuffer,
1197                                                                                 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1198                                                                                 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1199                                                                                 (VkDependencyFlags)0,
1200                                                                                 0, (const VkMemoryBarrier*)DE_NULL,
1201                                                                                 0, (const VkBufferMemoryBarrier*)DE_NULL,
1202                                                                                 1, &imageBarrierPost);
1203 }
1204
1205 void BinaryAtomicInstanceBase::createImageAndView       (VkFormat                                               imageFormat,
1206                                                                                                          const tcu::UVec3&                              imageExent,
1207                                                                                                          bool                                                   useTransfer,
1208                                                                                                          de::MovePtr<Image>&                    imagePtr,
1209                                                                                                          Move<VkImageView>&                             imageViewPtr)
1210 {
1211         const VkDevice                  device                  = m_context.getDevice();
1212         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
1213         Allocator&                              allocator               = m_context.getDefaultAllocator();
1214         const VkImageUsageFlags usageFlags              = getUsageFlags(useTransfer);
1215         VkImageCreateFlags              createFlags             = 0u;
1216
1217         if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1218                 createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1219
1220         const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1221
1222         VkImageCreateInfo createInfo =
1223         {
1224                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                                    // VkStructureType                      sType;
1225                 DE_NULL,                                                                                                // const void*                          pNext;
1226                 createFlags,                                                                                    // VkImageCreateFlags           flags;
1227                 mapImageType(m_imageType),                                                              // VkImageType                          imageType;
1228                 imageFormat,                                                                                    // VkFormat                                     format;
1229                 makeExtent3D(imageExent),                                                               // VkExtent3D                           extent;
1230                 1u,                                                                                                             // deUint32                                     mipLevels;
1231                 numLayers,                                                                                              // deUint32                                     arrayLayers;
1232                 VK_SAMPLE_COUNT_1_BIT,                                                                  // VkSampleCountFlagBits        samples;
1233                 VK_IMAGE_TILING_OPTIMAL,                                                                // VkImageTiling                        tiling;
1234                 usageFlags,                                                                                             // VkImageUsageFlags            usage;
1235                 VK_SHARING_MODE_EXCLUSIVE,                                                              // VkSharingMode                        sharingMode;
1236                 0u,                                                                                                             // deUint32                                     queueFamilyIndexCount;
1237                 DE_NULL,                                                                                                // const deUint32*                      pQueueFamilyIndices;
1238                 VK_IMAGE_LAYOUT_UNDEFINED,                                                              // VkImageLayout                        initialLayout;
1239         };
1240
1241         if (m_backingType == ImageBackingType::SPARSE)
1242         {
1243                 const auto&             vki                             = m_context.getInstanceInterface();
1244                 const auto              physicalDevice  = m_context.getPhysicalDevice();
1245                 const auto              sparseQueue             = m_context.getSparseQueue();
1246                 const auto              sparseQueueIdx  = m_context.getSparseQueueFamilyIndex();
1247                 const auto              universalQIdx   = m_context.getUniversalQueueFamilyIndex();
1248                 const deUint32  queueIndices[]  = { universalQIdx, sparseQueueIdx };
1249
1250                 createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1251
1252                 if (sparseQueueIdx != universalQIdx)
1253                 {
1254                         createInfo.sharingMode                          = VK_SHARING_MODE_CONCURRENT;
1255                         createInfo.queueFamilyIndexCount        = static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1256                         createInfo.pQueueFamilyIndices          = queueIndices;
1257                 }
1258
1259                 const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1260                 m_waitSemaphores.push_back(sparseImage->getSemaphore());
1261                 imagePtr = de::MovePtr<Image>(sparseImage);
1262         }
1263         else
1264                 imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1265
1266         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1267
1268         imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1269 }
1270
1271 void BinaryAtomicInstanceBase::createImageResources (const VkFormat&    imageFormat,
1272                                                                                                          const bool                     useTransfer)
1273 {
1274         //Create the image that is going to store results of atomic operations
1275         createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1276 }
1277
1278 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1279 {
1280 public:
1281
1282                                                 BinaryAtomicEndResultInstance  (Context&                                        context,
1283                                                                                                                 const string&                           name,
1284                                                                                                                 const ImageType                         imageType,
1285                                                                                                                 const tcu::UVec3&                       imageSize,
1286                                                                                                                 const TextureFormat&            format,
1287                                                                                                                 const AtomicOperation           operation,
1288                                                                                                                 const bool                                      useTransfer,
1289                                                                                                                 const ShaderReadType            shaderReadType,
1290                                                                                                                 const ImageBackingType          backingType)
1291                                                         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation, useTransfer, shaderReadType, backingType) {}
1292
1293         virtual deUint32        getOutputBufferSize                        (void) const;
1294
1295         virtual void            prepareResources                           (const bool                                  useTransfer) { DE_UNREF(useTransfer); }
1296         virtual void            prepareDescriptors                         (const bool                                  isTexelBuffer);
1297
1298         virtual void            commandsBeforeCompute              (const VkCommandBuffer) const {}
1299         virtual void            commandsAfterCompute               (const VkCommandBuffer               cmdBuffer,
1300                                                                                                                 const VkPipeline                        pipeline,
1301                                                                                                                 const VkPipelineLayout          pipelineLayout,
1302                                                                                                                 const VkDescriptorSet           descriptorSet,
1303                                                                                                                 const VkDeviceSize&                     range,
1304                                                                                                                 const bool                                      useTransfer);
1305
1306         virtual bool            verifyResult                               (Allocation&                                 outputBufferAllocation,
1307                                                                                                                 const bool                                      is64Bit) const;
1308
1309 protected:
1310
1311         template <typename T>
1312         bool                            isValueCorrect                             (const T                                             resultValue,
1313                                                                                                                 deInt32                                         x,
1314                                                                                                                 deInt32                                         y,
1315                                                                                                                 deInt32                                         z,
1316                                                                                                                 const UVec3&                            gridSize,
1317                                                                                                                 const IVec3                                     extendedGridSize) const;
1318 };
1319
1320 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1321 {
1322         return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1323 }
1324
1325 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool      isTexelBuffer)
1326 {
1327         const VkDescriptorType  descriptorType  = isTexelBuffer ?
1328                                                                                         VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1329                                                                                         VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1330         const VkDevice                  device                  = m_context.getDevice();
1331         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
1332
1333         m_descriptorSetLayout =
1334                 DescriptorSetLayoutBuilder()
1335                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1336                 .build(deviceInterface, device);
1337
1338         m_descriptorPool =
1339                 DescriptorPoolBuilder()
1340                 .addType(descriptorType)
1341                 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1342
1343         m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1344
1345         if (isTexelBuffer)
1346         {
1347                 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1348
1349                 DescriptorSetUpdateBuilder()
1350                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1351                         .update(deviceInterface, device);
1352         }
1353         else
1354         {
1355                 const VkDescriptorImageInfo     descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1356
1357                 DescriptorSetUpdateBuilder()
1358                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1359                         .update(deviceInterface, device);
1360         }
1361 }
1362
1363 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer         cmdBuffer,
1364                                                                                                                   const VkPipeline                      pipeline,
1365                                                                                                                   const VkPipelineLayout        pipelineLayout,
1366                                                                                                                   const VkDescriptorSet         descriptorSet,
1367                                                                                                                   const VkDeviceSize&           range,
1368                                                                                                                   const bool                            useTransfer)
1369 {
1370         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
1371         const VkImageSubresourceRange   subresourceRange        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1372         const UVec3                                             layerSize                       = getLayerSize(m_imageType, m_imageSize);
1373
1374         if (m_imageType == IMAGE_TYPE_BUFFER)
1375         {
1376                 m_outputBuffer = m_inputBuffer;
1377         }
1378         else if (useTransfer)
1379         {
1380                 const VkImageMemoryBarrier      resultImagePostDispatchBarrier =
1381                         makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1382                                                                         VK_ACCESS_TRANSFER_READ_BIT,
1383                                                                         VK_IMAGE_LAYOUT_GENERAL,
1384                                                                         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1385                                                                         m_resultImage->get(),
1386                                                                         subresourceRange);
1387
1388                 deviceInterface.cmdPipelineBarrier(     cmdBuffer,
1389                                                                                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1390                                                                                         VK_PIPELINE_STAGE_TRANSFER_BIT,
1391                                                                                         DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1392                                                                                         1u, &resultImagePostDispatchBarrier);
1393
1394                 const VkBufferImageCopy         bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1395
1396                 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1397         }
1398         else
1399         {
1400                 const VkDevice                                  device                                  = m_context.getDevice();
1401                 const VkDescriptorImageInfo             descResultImageInfo             = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1402                 const VkDescriptorBufferInfo    descResultBufferInfo    = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1403
1404                 DescriptorSetUpdateBuilder()
1405                         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1406                         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1407                         .update(deviceInterface, device);
1408
1409                 const VkImageMemoryBarrier      resultImagePostDispatchBarrier =
1410                         makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1411                                                                         VK_ACCESS_SHADER_READ_BIT,
1412                                                                         VK_IMAGE_LAYOUT_GENERAL,
1413                                                                         VK_IMAGE_LAYOUT_GENERAL,
1414                                                                         m_resultImage->get(),
1415                                                                         subresourceRange);
1416
1417                 deviceInterface.cmdPipelineBarrier(     cmdBuffer,
1418                                                                                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1419                                                                                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1420                                                                                         DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1421                                                                                         1u, &resultImagePostDispatchBarrier);
1422
1423                 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1424                 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1425
1426                 switch (m_imageType)
1427                 {
1428                         case IMAGE_TYPE_1D_ARRAY:
1429                                 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1430                                 break;
1431                         case IMAGE_TYPE_2D_ARRAY:
1432                         case IMAGE_TYPE_CUBE:
1433                         case IMAGE_TYPE_CUBE_ARRAY:
1434                                 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1435                                 break;
1436                         default:
1437                                 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1438                                 break;
1439                 }
1440         }
1441 }
1442
1443 bool BinaryAtomicEndResultInstance::verifyResult (Allocation&   outputBufferAllocation,
1444                                                                                                   const bool    is64Bit) const
1445 {
1446         const UVec3     gridSize                        = getShaderGridSize(m_imageType, m_imageSize);
1447         const IVec3 extendedGridSize    = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1448
1449         tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1450
1451         for (deInt32 z = 0; z < resultBuffer.getDepth();  z++)
1452         for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1453         for (deInt32 x = 0; x < resultBuffer.getWidth();  x++)
1454         {
1455                 const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1456                 deUint32 floatToUnsignedValue = 0;
1457                 bool isFloatValue = false;
1458                 if (isFloatFormat(mapTextureFormat(m_format)))
1459                 {
1460                         isFloatValue = true;
1461                         floatToUnsignedValue = static_cast<deUint32>(*((float*)resultValue));
1462                 }
1463
1464                 if (isOrderIndependentAtomicOperation(m_operation))
1465                 {
1466                         if (isUintFormat(mapTextureFormat(m_format)))
1467                         {
1468                                 if(is64Bit)
1469                                 {
1470                                         if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1471                                                 return false;
1472                                 }
1473                                 else
1474                                 {
1475                                         if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1476                                                 return false;
1477                                 }
1478                         }
1479                         else if (isIntFormat(mapTextureFormat(m_format)))
1480                         {
1481                                 if (is64Bit)
1482                                 {
1483                                         if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1484                                                 return false;
1485                                 }
1486                                 else
1487                                 {
1488                                         if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1489                                                 return false;
1490                                 }
1491                         }
1492                         else
1493                         {
1494                                 // 32-bit floating point
1495                                 if (!isValueCorrect<deUint32>(floatToUnsignedValue, x, y, z, gridSize, extendedGridSize))
1496                                         return false;
1497                         }
1498                 }
1499                 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1500                 {
1501                         // Check if the end result equals one of the atomic args.
1502                         bool matchFound = false;
1503
1504                         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1505                         {
1506                                 const IVec3 gid(x + i*gridSize.x(), y, z);
1507                                 matchFound = is64Bit ?
1508                                         (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1509                                         isFloatValue ?
1510                                         floatToUnsignedValue == getAtomicFuncArgument<deUint32>(m_operation, gid, extendedGridSize) :
1511                                         (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1512
1513                         }
1514
1515                         if (!matchFound)
1516                                 return false;
1517                 }
1518                 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1519                 {
1520                         // Check if the end result equals one of the atomic args.
1521                         bool matchFound = false;
1522
1523                         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1524                         {
1525                                 const IVec3 gid(x + i*gridSize.x(), y, z);
1526                                 matchFound = is64Bit ?
1527                                         (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1528                                         isFloatValue ?
1529                                         floatToUnsignedValue == getAtomicFuncArgument<deUint32>(m_operation, gid, extendedGridSize) :
1530                                         (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1531                         }
1532
1533                         if (!matchFound)
1534                                 return false;
1535                 }
1536                 else
1537                         DE_ASSERT(false);
1538         }
1539         return true;
1540 }
1541
1542 template <typename T>
1543 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1544 {
1545         T reference = getOperationInitialValue<T>(m_operation);
1546         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1547         {
1548                 const IVec3 gid(x + i*gridSize.x(), y, z);
1549                 T                       arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1550                 reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1551         }
1552         return (resultValue == reference);
1553 }
1554
1555 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1556 {
1557         return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation, m_useTransfer, m_readType, m_backingType);
1558 }
1559
1560 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1561 {
1562 public:
1563
1564                                                 BinaryAtomicIntermValuesInstance   (Context&                            context,
1565                                                                                                                         const string&                   name,
1566                                                                                                                         const ImageType                 imageType,
1567                                                                                                                         const tcu::UVec3&               imageSize,
1568                                                                                                                         const TextureFormat&    format,
1569                                                                                                                         const AtomicOperation   operation,
1570                                                                                                                         const bool                              useTransfer,
1571                                                                                                                         const ShaderReadType    shaderReadType,
1572                                                                                                                         const ImageBackingType  backingType)
1573                                                         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation, useTransfer, shaderReadType, backingType) {}
1574
1575         virtual deUint32        getOutputBufferSize                                (void) const;
1576
1577         virtual void            prepareResources                                   (const bool                          useTransfer);
1578         virtual void            prepareDescriptors                                 (const bool                          isTexelBuffer);
1579
1580         virtual void            commandsBeforeCompute                      (const VkCommandBuffer       cmdBuffer) const;
1581         virtual void            commandsAfterCompute                       (const VkCommandBuffer       cmdBuffer,
1582                                                                                                                         const VkPipeline                pipeline,
1583                                                                                                                         const VkPipelineLayout  pipelineLayout,
1584                                                                                                                         const VkDescriptorSet   descriptorSet,
1585                                                                                                                         const VkDeviceSize&             range,
1586                                                                                                                         const bool                              useTransfer);
1587
1588         virtual bool            verifyResult                                       (Allocation&                         outputBufferAllocation,
1589                                                                                                                         const bool                              is64Bit) const;
1590
1591 protected:
1592
1593         template <typename T>
1594         bool                            areValuesCorrect                                   (tcu::ConstPixelBufferAccess& resultBuffer,
1595                                                                                                                         const bool isFloatingPoint,
1596                                                                                                                         deInt32 x,
1597                                                                                                                         deInt32 y,
1598                                                                                                                         deInt32 z,
1599                                                                                                                         const UVec3& gridSize,
1600                                                                                                                         const IVec3 extendedGridSize) const;
1601
1602         template <typename T>
1603         bool                            verifyRecursive                                    (const deInt32                       index,
1604                                                                                                                         const T                                 valueSoFar,
1605                                                                                                                         bool                                    argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1606                                                                                                                         const T                                 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1607                                                                                                                         const T                                 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1608         de::MovePtr<Image>      m_intermResultsImage;
1609         Move<VkImageView>       m_intermResultsImageView;
1610 };
1611
1612 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1613 {
1614         return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1615 }
1616
1617 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1618 {
1619         const UVec3 layerSize                   = getLayerSize(m_imageType, m_imageSize);
1620         const bool  isCubeBasedImage    = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1621         const UVec3 extendedLayerSize   = isCubeBasedImage      ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1622                                                                                                                 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1623
1624         createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1625 }
1626
1627 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool   isTexelBuffer)
1628 {
1629         const VkDescriptorType  descriptorType  = isTexelBuffer ?
1630                                                                                         VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1631                                                                                         VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1632
1633         const VkDevice                  device                  = m_context.getDevice();
1634         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
1635
1636         m_descriptorSetLayout =
1637                 DescriptorSetLayoutBuilder()
1638                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1639                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1640                 .build(deviceInterface, device);
1641
1642         m_descriptorPool =
1643                 DescriptorPoolBuilder()
1644                 .addType(descriptorType, 2u)
1645                 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1646
1647         m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1648
1649         if (isTexelBuffer)
1650         {
1651                 m_descResultBufferView                  = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1652                 m_descIntermResultsBufferView   = makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1653
1654                 DescriptorSetUpdateBuilder()
1655                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1656                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1657                         .update(deviceInterface, device);
1658         }
1659         else
1660         {
1661                 const VkDescriptorImageInfo     descResultImageInfo                     = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1662                 const VkDescriptorImageInfo     descIntermResultsImageInfo      = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1663
1664                 DescriptorSetUpdateBuilder()
1665                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1666                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1667                         .update(deviceInterface, device);
1668         }
1669 }
1670
1671 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1672 {
1673         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
1674         const VkImageSubresourceRange   subresourceRange        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1675
1676         const VkImageMemoryBarrier      imagePreDispatchBarrier =
1677                 makeImageMemoryBarrier( 0u,
1678                                                                 VK_ACCESS_SHADER_WRITE_BIT,
1679                                                                 VK_IMAGE_LAYOUT_UNDEFINED,
1680                                                                 VK_IMAGE_LAYOUT_GENERAL,
1681                                                                 m_intermResultsImage->get(),
1682                                                                 subresourceRange);
1683
1684         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1685 }
1686
1687 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer              cmdBuffer,
1688                                                                                                                          const VkPipeline                       pipeline,
1689                                                                                                                          const VkPipelineLayout         pipelineLayout,
1690                                                                                                                          const VkDescriptorSet          descriptorSet,
1691                                                                                                                          const VkDeviceSize&            range,
1692                                                                                                                          const bool                                     useTransfer)
1693 {
1694         // nothing is needed for texel image buffer
1695         if (m_imageType == IMAGE_TYPE_BUFFER)
1696                 return;
1697
1698         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
1699         const VkImageSubresourceRange   subresourceRange        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1700         const UVec3                                             layerSize                       = getLayerSize(m_imageType, m_imageSize);
1701
1702         if (useTransfer)
1703         {
1704                 const VkImageMemoryBarrier      imagePostDispatchBarrier =
1705                         makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1706                                                                         VK_ACCESS_TRANSFER_READ_BIT,
1707                                                                         VK_IMAGE_LAYOUT_GENERAL,
1708                                                                         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1709                                                                         m_intermResultsImage->get(),
1710                                                                         subresourceRange);
1711
1712                 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1713
1714                 const UVec3                                     extendedLayerSize               = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1715                 const VkBufferImageCopy         bufferImageCopyParams   = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1716
1717                 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1718         }
1719         else
1720         {
1721                 const VkDevice                                  device                                  = m_context.getDevice();
1722                 const VkDescriptorImageInfo             descResultImageInfo             = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1723                 const VkDescriptorBufferInfo    descResultBufferInfo    = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1724
1725                 DescriptorSetUpdateBuilder()
1726                         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1727                         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1728                         .update(deviceInterface, device);
1729
1730                 const VkImageMemoryBarrier      resultImagePostDispatchBarrier =
1731                 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1732                                                                 VK_ACCESS_SHADER_READ_BIT,
1733                                                                 VK_IMAGE_LAYOUT_GENERAL,
1734                                                                 VK_IMAGE_LAYOUT_GENERAL,
1735                                                                 m_intermResultsImage->get(),
1736                                                                 subresourceRange);
1737
1738                 deviceInterface.cmdPipelineBarrier(     cmdBuffer,
1739                                                                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1740                                                                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1741                                                                         DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1742                                                                         1u, &resultImagePostDispatchBarrier);
1743
1744                 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1745                 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1746
1747                 switch (m_imageType)
1748                 {
1749                         case IMAGE_TYPE_1D_ARRAY:
1750                                 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1751                                 break;
1752                         case IMAGE_TYPE_2D_ARRAY:
1753                         case IMAGE_TYPE_CUBE:
1754                         case IMAGE_TYPE_CUBE_ARRAY:
1755                                 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1756                                 break;
1757                         default:
1758                                 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1759                                 break;
1760                 }
1761         }
1762 }
1763
1764 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation&        outputBufferAllocation,
1765                                                                                                          const bool             is64Bit) const
1766 {
1767         const UVec3     gridSize                 = getShaderGridSize(m_imageType, m_imageSize);
1768         const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1769
1770         tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1771
1772         for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1773         for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1774         for (deUint32 x = 0; x < gridSize.x(); x++)
1775         {
1776                 if (isUintFormat(mapTextureFormat(m_format)))
1777                 {
1778                         if (is64Bit)
1779                         {
1780                                 if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1781                                         return false;
1782                         }
1783                         else
1784                         {
1785                                 if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1786                                         return false;
1787                         }
1788                 }
1789                 else if (isIntFormat(mapTextureFormat(m_format)))
1790                 {
1791                         if (is64Bit)
1792                         {
1793                                 if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1794                                         return false;
1795                         }
1796                         else
1797                         {
1798                                 if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1799                                         return false;
1800                         }
1801                 }
1802                 else
1803                 {
1804                         // 32-bit floating point
1805                         if (!areValuesCorrect<deUint32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1806                                 return false;
1807                 }
1808         }
1809
1810         return true;
1811 }
1812
1813 template <typename T>
1814 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1815 {
1816         T               resultValues[NUM_INVOCATIONS_PER_PIXEL];
1817         T               atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1818         bool    argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1819
1820         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1821         {
1822                 IVec3 gid(x + i*gridSize.x(), y, z);
1823                 T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1824                 if (isFloatingPoint)
1825                 {
1826                         float fData;
1827                         deMemcpy(&fData, &data, sizeof(fData));
1828                         data = static_cast<T>(fData);
1829                 }
1830                 resultValues[i] = data;
1831                 atomicArgs[i]   = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1832                 argsUsed[i]             = false;
1833         }
1834
1835         // Verify that the return values form a valid sequence.
1836         return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1837 }
1838
1839 template <typename T>
1840 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32   index,
1841                                                                                                                 const T                 valueSoFar,
1842                                                                                                                 bool                    argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1843                                                                                                                 const T                 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1844                                                                                                                 const T                 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1845 {
1846         if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1847                 return true;
1848
1849         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1850         {
1851                 if (!argsUsed[i] && resultValues[i] == valueSoFar)
1852                 {
1853                         argsUsed[i] = true;
1854
1855                         if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1856                         {
1857                                 return true;
1858                         }
1859
1860                         argsUsed[i] = false;
1861                 }
1862         }
1863
1864         return false;
1865 }
1866
1867 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1868 {
1869         return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation, m_useTransfer, m_readType, m_backingType);
1870 }
1871
1872 } // anonymous ns
1873
1874 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1875 {
1876         de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
1877
1878         struct ImageParams
1879         {
1880                 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1881                         : m_imageType   (imageType)
1882                         , m_imageSize   (imageSize)
1883                 {
1884                 }
1885                 const ImageType         m_imageType;
1886                 const tcu::UVec3        m_imageSize;
1887         };
1888
1889         const ImageParams imageParamsArray[] =
1890         {
1891                 ImageParams(IMAGE_TYPE_1D,                      tcu::UVec3(64u, 1u, 1u)),
1892                 ImageParams(IMAGE_TYPE_1D_ARRAY,        tcu::UVec3(64u, 1u, 8u)),
1893                 ImageParams(IMAGE_TYPE_2D,                      tcu::UVec3(64u, 64u, 1u)),
1894                 ImageParams(IMAGE_TYPE_2D_ARRAY,        tcu::UVec3(64u, 64u, 8u)),
1895                 ImageParams(IMAGE_TYPE_3D,                      tcu::UVec3(48u, 48u, 8u)),
1896                 ImageParams(IMAGE_TYPE_CUBE,            tcu::UVec3(64u, 64u, 1u)),
1897                 ImageParams(IMAGE_TYPE_CUBE_ARRAY,      tcu::UVec3(64u, 64u, 2u)),
1898                 ImageParams(IMAGE_TYPE_BUFFER,          tcu::UVec3(64u, 1u, 1u))
1899         };
1900
1901         const tcu::TextureFormat formats[] =
1902         {
1903                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1904                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1905                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1906                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1907                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1908         };
1909
1910         const struct
1911         {
1912                 ShaderReadType          type;
1913                 const char*                     name;
1914         } readTypes[] =
1915         {
1916                 {       ShaderReadType::NORMAL, "normal_read"   },
1917                 {       ShaderReadType::SPARSE, "sparse_read"   },
1918         };
1919
1920         const struct
1921         {
1922                 ImageBackingType        type;
1923                 const char*                     name;
1924         } backingTypes[] =
1925         {
1926                 {       ImageBackingType::NORMAL,       "normal_img"    },
1927                 {       ImageBackingType::SPARSE,       "sparse_img"    },
1928         };
1929
1930         for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1931         {
1932                 const AtomicOperation operation = (AtomicOperation)operationI;
1933
1934                 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
1935
1936                 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
1937                 {
1938                         const ImageType  imageType = imageParamsArray[imageTypeNdx].m_imageType;
1939                         const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
1940
1941                         de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
1942
1943                         for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
1944                         {
1945                                 const bool                              useTransfer     = (useTransferIdx > 0);
1946                                 const string                    groupName       = (!useTransfer ? "no" : "") + string("transfer");
1947
1948                                 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str(), ""));
1949
1950                                 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
1951                                 {
1952                                         const auto& readType = readTypes[readTypeIdx];
1953
1954                                         de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name, ""));
1955
1956                                         for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
1957                                         {
1958                                                 const auto& backingType = backingTypes[backingTypeIdx];
1959
1960                                                 de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name, ""));
1961
1962                                                 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
1963                                                 {
1964                                                         const TextureFormat&    format          = formats[formatNdx];
1965                                                         const std::string               formatName      = getShaderImageFormatQualifier(format);
1966
1967                                                         // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
1968                                                         if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
1969                                                         {
1970                                                                 continue;
1971                                                         }
1972
1973                                                         // Only 2D and 3D images may support sparse residency.
1974                                                         const auto vkImageType = mapImageType(imageType);
1975                                                         if (backingType.type == ImageBackingType::SPARSE && (vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D))
1976                                                                 continue;
1977
1978                                                         // Only ADD and EXCHANGE are supported on floating-point
1979                                                         if (format.type == tcu::TextureFormat::FLOAT)
1980                                                         {
1981                                                                 if (operation != ATOMIC_OPERATION_ADD && operation != ATOMIC_OPERATION_EXCHANGE)
1982                                                                 {
1983                                                                         continue;
1984                                                                 }
1985                                                         }
1986
1987                                                         if (readType.type == ShaderReadType::SPARSE)
1988                                                         {
1989                                                                 // When using transfer, shader reads will not be used, so avoid creating two identical cases.
1990                                                                 if (useTransfer)
1991                                                                         continue;
1992
1993                                                                 // Sparse reads are not supported for all types of images.
1994                                                                 if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
1995                                                                         continue;
1996                                                         }
1997
1998                                                         //!< Atomic case checks the end result of the operations, and not the intermediate return values
1999                                                         const string caseEndResult = formatName + "_end_result";
2000                                                         backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2001
2002                                                         //!< Atomic case checks the return values of the atomic function and not the end result.
2003                                                         const string caseIntermValues = formatName + "_intermediate_values";
2004                                                         backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2005                                                 }
2006
2007                                                 readTypeGroup->addChild(backingTypeGroup.release());
2008                                         }
2009
2010                                         transferGroup->addChild(readTypeGroup.release());
2011                                 }
2012
2013                                 imageTypeGroup->addChild(transferGroup.release());
2014                         }
2015
2016                         operationGroup->addChild(imageTypeGroup.release());
2017                 }
2018
2019                 imageAtomicOperationsTests->addChild(operationGroup.release());
2020         }
2021
2022         return imageAtomicOperationsTests.release();
2023 }
2024
2025 } // image
2026 } // vkt