Fix missing dependency on sparse binds
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / image / vktImageAtomicOperationTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
44
45 #include "tcuTextureUtil.hpp"
46 #include "tcuTexture.hpp"
47 #include "tcuVectorType.hpp"
48 #include "tcuStringTemplate.hpp"
49
50 namespace vkt
51 {
52 namespace image
53 {
54 namespace
55 {
56
57 using namespace vk;
58 using namespace std;
59 using de::toString;
60
61 using tcu::TextureFormat;
62 using tcu::IVec2;
63 using tcu::IVec3;
64 using tcu::UVec3;
65 using tcu::Vec4;
66 using tcu::IVec4;
67 using tcu::UVec4;
68 using tcu::CubeFace;
69 using tcu::Texture1D;
70 using tcu::Texture2D;
71 using tcu::Texture3D;
72 using tcu::Texture2DArray;
73 using tcu::TextureCube;
74 using tcu::PixelBufferAccess;
75 using tcu::ConstPixelBufferAccess;
76 using tcu::Vector;
77 using tcu::TestContext;
78
79 enum
80 {
81         NUM_INVOCATIONS_PER_PIXEL = 5u
82 };
83
84 enum AtomicOperation
85 {
86         ATOMIC_OPERATION_ADD = 0,
87         ATOMIC_OPERATION_SUB,
88         ATOMIC_OPERATION_INC,
89         ATOMIC_OPERATION_DEC,
90         ATOMIC_OPERATION_MIN,
91         ATOMIC_OPERATION_MAX,
92         ATOMIC_OPERATION_AND,
93         ATOMIC_OPERATION_OR,
94         ATOMIC_OPERATION_XOR,
95         ATOMIC_OPERATION_EXCHANGE,
96         ATOMIC_OPERATION_COMPARE_EXCHANGE,
97
98         ATOMIC_OPERATION_LAST
99 };
100
101 enum class ShaderReadType
102 {
103         NORMAL = 0,
104         SPARSE,
105 };
106
107 enum class ImageBackingType
108 {
109         NORMAL = 0,
110         SPARSE,
111 };
112
113 static string getCoordStr (const ImageType              imageType,
114                                                    const std::string&   x,
115                                                    const std::string&   y,
116                                                    const std::string&   z)
117 {
118         switch (imageType)
119         {
120                 case IMAGE_TYPE_1D:
121                 case IMAGE_TYPE_BUFFER:
122                         return x;
123                 case IMAGE_TYPE_1D_ARRAY:
124                 case IMAGE_TYPE_2D:
125                         return string("ivec2(" + x + "," + y + ")");
126                 case IMAGE_TYPE_2D_ARRAY:
127                 case IMAGE_TYPE_3D:
128                 case IMAGE_TYPE_CUBE:
129                 case IMAGE_TYPE_CUBE_ARRAY:
130                         return string("ivec3(" + x + "," + y + "," + z + ")");
131                 default:
132                         DE_ASSERT(false);
133                         return "";
134         }
135 }
136
137 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
138 {
139         DE_ASSERT(intFormat || uintFormat || floatFormat);
140
141         const bool is64 = (componentWidth == 64);
142
143         if (intFormat)
144                 return (is64 ? "int64_t" : "int");
145         if (uintFormat)
146                 return (is64 ? "uint64_t" : "uint");
147         if (floatFormat)
148                 return (is64 ? "double" : "float");
149
150         return "";
151 }
152
153 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
154 {
155         DE_ASSERT(intFormat || uintFormat || floatFormat);
156
157         const bool is64 = (componentWidth == 64);
158
159         if (intFormat)
160                 return (is64 ? "i64vec4" : "ivec4");
161         if (uintFormat)
162                 return (is64 ? "u64vec4" : "uvec4");
163         if (floatFormat)
164                 return (is64 ? "f64vec4" : "vec4");
165
166         return "";
167 }
168
169 static string getAtomicFuncArgumentShaderStr (const AtomicOperation     op,
170                                                                                           const string&                 x,
171                                                                                           const string&                 y,
172                                                                                           const string&                 z,
173                                                                                           const IVec3&                  gridSize)
174 {
175         switch (op)
176         {
177                 case ATOMIC_OPERATION_ADD:
178                 case ATOMIC_OPERATION_AND:
179                 case ATOMIC_OPERATION_OR:
180                 case ATOMIC_OPERATION_XOR:
181                         return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
182                 case ATOMIC_OPERATION_MIN:
183                 case ATOMIC_OPERATION_MAX:
184                         // multiply by (1-2*(value % 2) to make half of the data negative
185                         // this will result in generating large numbers for uint formats
186                         return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
187                 case ATOMIC_OPERATION_EXCHANGE:
188                 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
189                         return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
190                 default:
191                         DE_ASSERT(false);
192                         return "";
193         }
194 }
195
196 static string getAtomicOperationCaseName (const AtomicOperation op)
197 {
198         switch (op)
199         {
200                 case ATOMIC_OPERATION_ADD:                              return string("add");
201                 case ATOMIC_OPERATION_SUB:                              return string("sub");
202                 case ATOMIC_OPERATION_INC:                              return string("inc");
203                 case ATOMIC_OPERATION_DEC:                              return string("dec");
204                 case ATOMIC_OPERATION_MIN:                              return string("min");
205                 case ATOMIC_OPERATION_MAX:                              return string("max");
206                 case ATOMIC_OPERATION_AND:                              return string("and");
207                 case ATOMIC_OPERATION_OR:                               return string("or");
208                 case ATOMIC_OPERATION_XOR:                              return string("xor");
209                 case ATOMIC_OPERATION_EXCHANGE:                 return string("exchange");
210                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("compare_exchange");
211                 default:
212                         DE_ASSERT(false);
213                         return "";
214         }
215 }
216
217 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
218 {
219         switch (op)
220         {
221                 case ATOMIC_OPERATION_ADD:                              return string("imageAtomicAdd");
222                 case ATOMIC_OPERATION_MIN:                              return string("imageAtomicMin");
223                 case ATOMIC_OPERATION_MAX:                              return string("imageAtomicMax");
224                 case ATOMIC_OPERATION_AND:                              return string("imageAtomicAnd");
225                 case ATOMIC_OPERATION_OR:                               return string("imageAtomicOr");
226                 case ATOMIC_OPERATION_XOR:                              return string("imageAtomicXor");
227                 case ATOMIC_OPERATION_EXCHANGE:                 return string("imageAtomicExchange");
228                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("imageAtomicCompSwap");
229                 default:
230                         DE_ASSERT(false);
231                         return "";
232         }
233 }
234
235 template <typename T>
236 T getOperationInitialValue (const AtomicOperation op)
237 {
238         switch (op)
239         {
240                 // \note 18 is just an arbitrary small nonzero value.
241                 case ATOMIC_OPERATION_ADD:                              return 18;
242                 case ATOMIC_OPERATION_INC:                              return 18;
243                 case ATOMIC_OPERATION_SUB:                              return (1 << 24) - 1;
244                 case ATOMIC_OPERATION_DEC:                              return (1 << 24) - 1;
245                 case ATOMIC_OPERATION_MIN:                              return (1 << 15) - 1;
246                 case ATOMIC_OPERATION_MAX:                              return 18;
247                 case ATOMIC_OPERATION_AND:                              return (1 << 15) - 1;
248                 case ATOMIC_OPERATION_OR:                               return 18;
249                 case ATOMIC_OPERATION_XOR:                              return 18;
250                 case ATOMIC_OPERATION_EXCHANGE:                 return 18;
251                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 18;
252                 default:
253                         DE_ASSERT(false);
254                         return 0xFFFFFFFF;
255         }
256 }
257
258 template <>
259 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
260 {
261         switch (op)
262         {
263                 // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
264                 case ATOMIC_OPERATION_ADD:                              return 0x000000BEFFFFFF18;
265                 case ATOMIC_OPERATION_INC:                              return 0x000000BEFFFFFF18;
266                 case ATOMIC_OPERATION_SUB:                              return (1ull << 56) - 1;
267                 case ATOMIC_OPERATION_DEC:                              return (1ull << 56) - 1;
268                 case ATOMIC_OPERATION_MIN:                              return (1ull << 47) - 1;
269                 case ATOMIC_OPERATION_MAX:                              return 0x000000BEFFFFFF18;
270                 case ATOMIC_OPERATION_AND:                              return (1ull << 47) - 1;
271                 case ATOMIC_OPERATION_OR:                               return 0x000000BEFFFFFF18;
272                 case ATOMIC_OPERATION_XOR:                              return 0x000000BEFFFFFF18;
273                 case ATOMIC_OPERATION_EXCHANGE:                 return 0x000000BEFFFFFF18;
274                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 0x000000BEFFFFFF18;
275                 default:
276                         DE_ASSERT(false);
277                         return 0xFFFFFFFFFFFFFFFF;
278         }
279 }
280
281 template <>
282 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
283 {
284         return (deUint64)getOperationInitialValue<deInt64>(op);
285 }
286
287
288 template <typename T>
289 static T getAtomicFuncArgument (const AtomicOperation   op,
290                                                                 const IVec3&                    invocationID,
291                                                                 const IVec3&                    gridSize)
292 {
293         const T x = static_cast<T>(invocationID.x());
294         const T y = static_cast<T>(invocationID.y());
295         const T z = static_cast<T>(invocationID.z());
296
297         switch (op)
298         {
299                 // \note Fall-throughs.
300                 case ATOMIC_OPERATION_ADD:
301                 case ATOMIC_OPERATION_SUB:
302                 case ATOMIC_OPERATION_AND:
303                 case ATOMIC_OPERATION_OR:
304                 case ATOMIC_OPERATION_XOR:
305                         return x*x + y*y + z*z;
306                 case ATOMIC_OPERATION_INC:
307                 case ATOMIC_OPERATION_DEC:
308                         return 1;
309                 case ATOMIC_OPERATION_MIN:
310                 case ATOMIC_OPERATION_MAX:
311                         // multiply half of the data by -1
312                         return (1-2*(x % 2))*(x*x + y*y + z*z);
313                 case ATOMIC_OPERATION_EXCHANGE:
314                 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
315                         return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
316                 default:
317                         DE_ASSERT(false);
318                         return -1;
319         }
320 }
321
322 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
323 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
324 {
325         return  op == ATOMIC_OPERATION_ADD ||
326                         op == ATOMIC_OPERATION_SUB ||
327                         op == ATOMIC_OPERATION_INC ||
328                         op == ATOMIC_OPERATION_DEC ||
329                         op == ATOMIC_OPERATION_MIN ||
330                         op == ATOMIC_OPERATION_MAX ||
331                         op == ATOMIC_OPERATION_AND ||
332                         op == ATOMIC_OPERATION_OR ||
333                         op == ATOMIC_OPERATION_XOR;
334 }
335
336 //! Checks if the operation needs an SPIR-V shader.
337 static bool isSpirvAtomicOperation (const AtomicOperation op)
338 {
339         return  op == ATOMIC_OPERATION_SUB ||
340                         op == ATOMIC_OPERATION_INC ||
341                         op == ATOMIC_OPERATION_DEC;
342 }
343
344 //! Returns the SPIR-V assembler name of the given operation.
345 static std::string getSpirvAtomicOpName (const AtomicOperation op)
346 {
347         switch (op)
348         {
349         case ATOMIC_OPERATION_SUB:      return "OpAtomicISub";
350         case ATOMIC_OPERATION_INC:      return "OpAtomicIIncrement";
351         case ATOMIC_OPERATION_DEC:      return "OpAtomicIDecrement";
352         default:                                        break;
353         }
354
355         DE_ASSERT(false);
356         return "";
357 }
358
359 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
360 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
361 {
362         switch (op)
363         {
364         case ATOMIC_OPERATION_SUB:      return false;
365         case ATOMIC_OPERATION_INC:      // fallthrough
366         case ATOMIC_OPERATION_DEC:      return true;
367         default:                                        break;
368         }
369
370         DE_ASSERT(false);
371         return false;
372 }
373
374 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
375 template <typename T>
376 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
377 {
378         switch (op)
379         {
380                 case ATOMIC_OPERATION_INC:                              // fallthrough.
381                 case ATOMIC_OPERATION_ADD:                              return a + b;
382                 case ATOMIC_OPERATION_DEC:                              // fallthrough.
383                 case ATOMIC_OPERATION_SUB:                              return a - b;
384                 case ATOMIC_OPERATION_MIN:                              return de::min(a, b);
385                 case ATOMIC_OPERATION_MAX:                              return de::max(a, b);
386                 case ATOMIC_OPERATION_AND:                              return a & b;
387                 case ATOMIC_OPERATION_OR:                               return a | b;
388                 case ATOMIC_OPERATION_XOR:                              return a ^ b;
389                 case ATOMIC_OPERATION_EXCHANGE:                 return b;
390                 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
391                 default:
392                         DE_ASSERT(false);
393                         return -1;
394         }
395 }
396
397 VkImageUsageFlags getUsageFlags (bool useTransfer)
398 {
399         VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
400
401         if (useTransfer)
402                 usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
403
404         return usageFlags;
405 }
406
407 void AddFillReadShader (SourceCollections&                      sourceCollections,
408                                                 const ImageType&                        imageType,
409                                                 const tcu::TextureFormat&       format,
410                                                 const string&                           componentType,
411                                                 const string&                           vec4Type)
412 {
413         const string    imageInCoord                    = getCoordStr(imageType, "gx", "gy", "gz");
414         const string    shaderImageFormatStr    = getShaderImageFormatQualifier(format);
415         const string    shaderImageTypeStr              = getShaderImageType(format, imageType);
416         const auto              componentWidth                  = getFormatComponentWidth(mapTextureFormat(format), 0u);
417         const string    extensions                              = ((componentWidth == 64u)
418                                                                                         ?       "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
419                                                                                                 "#extension GL_EXT_shader_image_int64 : require\n"
420                                                                                         :       "");
421
422
423         const string fillShader =       "#version 450\n"
424                                                                 + extensions +
425                                                                 "precision highp " + shaderImageTypeStr + ";\n"
426                                                                 "\n"
427                                                                 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
428                                                                 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
429                                                                 "\n"
430                                                                 "layout(std430, binding = 1) buffer inputBuffer\n"
431                                                                 "{\n"
432                                                                 "       "+ componentType + " data[];\n"
433                                                                 "} inBuffer;\n"
434                                                                 "\n"
435                                                                 "void main(void)\n"
436                                                                 "{\n"
437                                                                 "       int gx = int(gl_GlobalInvocationID.x);\n"
438                                                                 "       int gy = int(gl_GlobalInvocationID.y);\n"
439                                                                 "       int gz = int(gl_GlobalInvocationID.z);\n"
440                                                                 "       uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
441                                                                 "       imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
442                                                                 "}\n";
443
444         const string readShader =       "#version 450\n"
445                                                                 + extensions +
446                                                                 "precision highp " + shaderImageTypeStr + ";\n"
447                                                                 "\n"
448                                                                 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
449                                                                 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
450                                                                 "\n"
451                                                                 "layout(std430, binding = 1) buffer outputBuffer\n"
452                                                                 "{\n"
453                                                                 "       " + componentType + " data[];\n"
454                                                                 "} outBuffer;\n"
455                                                                 "\n"
456                                                                 "void main(void)\n"
457                                                                 "{\n"
458                                                                 "       int gx = int(gl_GlobalInvocationID.x);\n"
459                                                                 "       int gy = int(gl_GlobalInvocationID.y);\n"
460                                                                 "       int gz = int(gl_GlobalInvocationID.z);\n"
461                                                                 "       uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
462                                                                 "       outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
463                                                                 "}\n";
464
465
466         if ((imageType != IMAGE_TYPE_1D) &&
467                 (imageType != IMAGE_TYPE_1D_ARRAY) &&
468                 (imageType != IMAGE_TYPE_BUFFER))
469         {
470                 const string readShaderResidency  = "#version 450\n"
471                                                                                         "#extension GL_ARB_sparse_texture2 : require\n"
472                                                                                         + extensions +
473                                                                                         "precision highp " + shaderImageTypeStr + ";\n"
474                                                                                         "\n"
475                                                                                         "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
476                                                                                         "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
477                                                                                         "\n"
478                                                                                         "layout(std430, binding = 1) buffer outputBuffer\n"
479                                                                                         "{\n"
480                                                                                         "       " + componentType + " data[];\n"
481                                                                                         "} outBuffer;\n"
482                                                                                         "\n"
483                                                                                         "void main(void)\n"
484                                                                                         "{\n"
485                                                                                         "       int gx = int(gl_GlobalInvocationID.x);\n"
486                                                                                         "       int gy = int(gl_GlobalInvocationID.y);\n"
487                                                                                         "       int gz = int(gl_GlobalInvocationID.z);\n"
488                                                                                         "       uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
489                                                                                         "       outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
490                                                                                         "       " + vec4Type + " sparseValue;\n"
491                                                                                         "       sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
492                                                                                         "       if (outBuffer.data[index] != sparseValue.x)\n"
493                                                                                         "               outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
494                                                                                         "}\n";
495
496                 sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
497         }
498
499         sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
500         sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
501 }
502
503 //! Prepare the initial data for the image
504 static void initDataForImage (const VkDevice                    device,
505                                                           const DeviceInterface&        deviceInterface,
506                                                           const TextureFormat&          format,
507                                                           const AtomicOperation         operation,
508                                                           const tcu::UVec3&                     gridSize,
509                                                           BufferWithMemory&                     buffer)
510 {
511         Allocation&                             bufferAllocation        = buffer.getAllocation();
512         const VkFormat                  imageFormat                     = mapTextureFormat(format);
513         tcu::PixelBufferAccess  pixelBuffer                     (format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
514
515         if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
516         {
517                 const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
518
519                 for (deUint32 z = 0; z < gridSize.z(); z++)
520                 for (deUint32 y = 0; y < gridSize.y(); y++)
521                 for (deUint32 x = 0; x < gridSize.x(); x++)
522                 {
523                         *((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
524                 }
525         }
526         else
527         {
528                 const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
529
530                 for (deUint32 z = 0; z < gridSize.z(); z++)
531                 for (deUint32 y = 0; y < gridSize.y(); y++)
532                 for (deUint32 x = 0; x < gridSize.x(); x++)
533                 {
534                         pixelBuffer.setPixel(initialValue, x, y, z);
535                 }
536         }
537
538         flushAlloc(deviceInterface, device, bufferAllocation);
539 }
540
541 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, VkImageTiling tiling, ImageType imageType, const tcu::UVec3& imageSize, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
542 {
543         const VkFormat                          format                          = mapTextureFormat(tcuFormat);
544         const VkImageType                       vkImgType                       = mapImageType(imageType);
545         const VkFormatFeatureFlags      texelBufferSupport      = (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
546
547         const auto& vki                         = context.getInstanceInterface();
548         const auto      physicalDevice  = context.getPhysicalDevice();
549         const auto usageFlags = getUsageFlags(useTransfer);
550
551         VkImageFormatProperties vkImageFormatProperties;
552         const auto result = vki.getPhysicalDeviceImageFormatProperties(physicalDevice, format, vkImgType, tiling, usageFlags, 0, &vkImageFormatProperties);
553         if (result != VK_SUCCESS) {
554                 if (result == VK_ERROR_FORMAT_NOT_SUPPORTED)
555                         TCU_THROW(NotSupportedError, "Format unsupported for tiling");
556                 else
557                         TCU_FAIL("vkGetPhysicalDeviceImageFormatProperties returned unexpected error");
558         }
559
560         if (vkImageFormatProperties.maxArrayLayers < (uint32_t)getNumLayers(imageType, imageSize)) {
561                 TCU_THROW(NotSupportedError, "This format and tiling combination does not support this number of aray layers");
562         }
563
564         const VkFormatProperties        formatProperties        = getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
565                                                                                                                                                                                 context.getPhysicalDevice(), format);
566         if ((imageType == IMAGE_TYPE_BUFFER) &&
567                 ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
568                 TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
569
570         const VkFormatFeatureFlags requiredFeaturesLinear = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
571         if (tiling == vk::VK_IMAGE_TILING_LINEAR &&
572                         ((formatProperties.linearTilingFeatures & requiredFeaturesLinear) != requiredFeaturesLinear)
573         ) {
574                 TCU_THROW(NotSupportedError, "Format doesn't support atomic storage with linear tiling");
575         }
576
577         if (imageType == IMAGE_TYPE_CUBE_ARRAY)
578                 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
579
580 #ifndef CTS_USES_VULKANSC
581         if (backingType == ImageBackingType::SPARSE)
582         {
583                 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
584
585                 switch (vkImgType)
586                 {
587                 case VK_IMAGE_TYPE_2D:  context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
588                 case VK_IMAGE_TYPE_3D:  context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
589                 default:                                DE_ASSERT(false); break;
590                 }
591
592                 if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, usageFlags, tiling))
593                         TCU_THROW(NotSupportedError, "Format does not support sparse images");
594         }
595 #endif // CTS_USES_VULKANSC
596
597         if (isFloatFormat(format))
598         {
599                 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
600
601                 const VkFormatFeatureFlags      requiredFeatures        = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
602                 const auto&                                     atomicFloatFeatures     = context.getShaderAtomicFloatFeaturesEXT();
603
604                 if (!atomicFloatFeatures.shaderImageFloat32Atomics)
605                         TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
606
607                 if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
608                         TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
609
610                 if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
611                 {
612                         context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
613 #ifndef CTS_USES_VULKANSC
614                         if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
615                         {
616                                 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
617                         }
618 #endif // CTS_USES_VULKANSC
619                 }
620
621                 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
622                         TCU_FAIL("Required format feature bits not supported");
623
624                 if (backingType == ImageBackingType::SPARSE)
625                 {
626                         if (!atomicFloatFeatures.sparseImageFloat32Atomics)
627                                 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
628
629                         if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
630                                 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
631                 }
632
633         }
634         else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
635         {
636                 context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
637
638                 const VkFormatFeatureFlags      requiredFeatures        = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
639                 const auto&                                     atomicInt64Features     = context.getShaderImageAtomicInt64FeaturesEXT();
640
641                 if (!atomicInt64Features.shaderImageInt64Atomics)
642                         TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
643
644                 if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
645                         TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
646
647                 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
648                         TCU_FAIL("Mandatory format features not supported");
649         }
650
651         if (useTransfer)
652         {
653                 const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
654                 if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
655                         TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
656         }
657
658         if (readType == ShaderReadType::SPARSE)
659         {
660                 DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
661                 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
662         }
663 }
664
665 class BinaryAtomicEndResultCase : public vkt::TestCase
666 {
667 public:
668                                                                 BinaryAtomicEndResultCase       (tcu::TestContext&                      testCtx,
669                                                                                                                          const string&                          name,
670                                                                                                                          const string&                          description,
671                                                                                                                          const ImageType                        imageType,
672                                                                                                                          const tcu::UVec3&                      imageSize,
673                                                                                                                          const tcu::TextureFormat&      format,
674                                                                                                                          const VkImageTiling            tiling,
675                                                                                                                          const AtomicOperation          operation,
676                                                                                                                          const bool                                     useTransfer,
677                                                                                                                          const ShaderReadType           shaderReadType,
678                                                                                                                          const ImageBackingType         backingType,
679                                                                                                                          const glu::GLSLVersion         glslVersion);
680
681         void                                            initPrograms                            (SourceCollections&                     sourceCollections) const;
682         TestInstance*                           createInstance                          (Context&                                       context) const;
683         virtual void                            checkSupport                            (Context&                                       context) const;
684
685 private:
686         const ImageType                         m_imageType;
687         const tcu::UVec3                        m_imageSize;
688         const tcu::TextureFormat        m_format;
689         const VkImageTiling                     m_tiling;
690         const AtomicOperation           m_operation;
691         const bool                                      m_useTransfer;
692         const ShaderReadType            m_readType;
693         const ImageBackingType          m_backingType;
694         const glu::GLSLVersion          m_glslVersion;
695 };
696
697 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext&                 testCtx,
698                                                                                                           const string&                         name,
699                                                                                                           const string&                         description,
700                                                                                                           const ImageType                       imageType,
701                                                                                                           const tcu::UVec3&                     imageSize,
702                                                                                                           const tcu::TextureFormat&     format,
703                                                                                                           const VkImageTiling           tiling,
704                                                                                                           const AtomicOperation         operation,
705                                                                                                           const bool                            useTransfer,
706                                                                                                           const ShaderReadType          shaderReadType,
707                                                                                                           const ImageBackingType        backingType,
708                                                                                                           const glu::GLSLVersion        glslVersion)
709         : TestCase              (testCtx, name, description)
710         , m_imageType   (imageType)
711         , m_imageSize   (imageSize)
712         , m_format              (format)
713         , m_tiling              (tiling)
714         , m_operation   (operation)
715         , m_useTransfer (useTransfer)
716         , m_readType    (shaderReadType)
717         , m_backingType (backingType)
718         , m_glslVersion (glslVersion)
719 {
720 }
721
722 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
723 {
724         commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
725 }
726
727 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
728 {
729         const VkFormat  imageFormat             = mapTextureFormat(m_format);
730         const deUint32  componentWidth  = getFormatComponentWidth(imageFormat, 0);
731         const bool              intFormat               = isIntFormat(imageFormat);
732         const bool              uintFormat              = isUintFormat(imageFormat);
733         const bool              floatFormat             = isFloatFormat(imageFormat);
734         const string    type                    = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
735         const string    vec4Type                = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
736
737         AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
738
739         if (isSpirvAtomicOperation(m_operation))
740         {
741                 const CaseVariant                                       caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
742                 const tcu::StringTemplate                       shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
743                 std::map<std::string, std::string>      specializations;
744
745                 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
746                 if (isSpirvAtomicNoLastArgOp(m_operation))
747                         specializations["LASTARG"] = "";
748
749                 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
750         }
751         else
752         {
753                 const string    versionDecl                             = glu::getGLSLVersionDeclaration(m_glslVersion);
754
755                 const UVec3             gridSize                                = getShaderGridSize(m_imageType, m_imageSize);
756                 const string    atomicCoord                             = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
757
758                 const string    atomicArgExpr                   = type + getAtomicFuncArgumentShaderStr(m_operation,
759                                                                                                                                                                                 "gx", "gy", "gz",
760                                                                                                                                                                                 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
761
762                 const string    compareExchangeStr              = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
763                                                                                                 (componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
764                                                                                                 : "";
765                 const string    atomicInvocation                = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
766                 const string    shaderImageFormatStr    = getShaderImageFormatQualifier(m_format);
767                 const string    shaderImageTypeStr              = getShaderImageType(m_format, m_imageType);
768                 const string    extensions                              = "#extension GL_EXT_shader_atomic_float : enable\n"
769                                                                                                   "#extension GL_EXT_shader_atomic_float2 : enable\n"
770                                                                                                   "#extension GL_KHR_memory_scope_semantics : enable";
771
772                 string source = versionDecl + "\n" + extensions + "\n";
773
774                 if (64 == componentWidth)
775                 {
776                         source +=       "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
777                                                 "#extension GL_EXT_shader_image_int64 : require\n";
778                 }
779
780                 source +=       "precision highp " + shaderImageTypeStr + ";\n"
781                                         "\n"
782                                         "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
783                                         "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
784                                         "\n"
785                                         "void main (void)\n"
786                                         "{\n"
787                                         "       int gx = int(gl_GlobalInvocationID.x);\n"
788                                         "       int gy = int(gl_GlobalInvocationID.y);\n"
789                                         "       int gz = int(gl_GlobalInvocationID.z);\n"
790                                         "       " + atomicInvocation + ";\n"
791                                         "}\n";
792
793                 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
794         }
795 }
796
797 class BinaryAtomicIntermValuesCase : public vkt::TestCase
798 {
799 public:
800                                                                 BinaryAtomicIntermValuesCase    (tcu::TestContext&                      testCtx,
801                                                                                                                                  const string&                          name,
802                                                                                                                                  const string&                          description,
803                                                                                                                                  const ImageType                        imageType,
804                                                                                                                                  const tcu::UVec3&                      imageSize,
805                                                                                                                                  const tcu::TextureFormat&      format,
806                                                                                                                                  const VkImageTiling            tiling,
807                                                                                                                                  const AtomicOperation          operation,
808                                                                                                                                  const bool                                     useTransfer,
809                                                                                                                                  const ShaderReadType           shaderReadType,
810                                                                                                                                  const ImageBackingType         backingType,
811                                                                                                                                  const glu::GLSLVersion         glslVersion);
812
813         void                                            initPrograms                                    (SourceCollections&                     sourceCollections) const;
814         TestInstance*                           createInstance                                  (Context&                                       context) const;
815         virtual void                            checkSupport                                    (Context&                                       context) const;
816
817 private:
818         const ImageType                         m_imageType;
819         const tcu::UVec3                        m_imageSize;
820         const tcu::TextureFormat        m_format;
821         const VkImageTiling                     m_tiling;
822         const AtomicOperation           m_operation;
823         const bool                                      m_useTransfer;
824         const ShaderReadType            m_readType;
825         const ImageBackingType          m_backingType;
826         const glu::GLSLVersion          m_glslVersion;
827 };
828
829 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext&                        testCtx,
830                                                                                                                         const string&                   name,
831                                                                                                                         const string&                   description,
832                                                                                                                         const ImageType                 imageType,
833                                                                                                                         const tcu::UVec3&               imageSize,
834                                                                                                                         const TextureFormat&    format,
835                                                                                                                         const VkImageTiling             tiling,
836                                                                                                                         const AtomicOperation   operation,
837                                                                                                                         const bool                              useTransfer,
838                                                                                                                         const ShaderReadType    shaderReadType,
839                                                                                                                         const ImageBackingType  backingType,
840                                                                                                                         const glu::GLSLVersion  glslVersion)
841         : TestCase              (testCtx, name, description)
842         , m_imageType   (imageType)
843         , m_imageSize   (imageSize)
844         , m_format              (format)
845         , m_tiling              (tiling)
846         , m_operation   (operation)
847         , m_useTransfer (useTransfer)
848         , m_readType    (shaderReadType)
849         , m_backingType (backingType)
850         , m_glslVersion (glslVersion)
851 {
852 }
853
854 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
855 {
856         commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
857 }
858
859 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
860 {
861         const VkFormat  imageFormat             = mapTextureFormat(m_format);
862         const deUint32  componentWidth  = getFormatComponentWidth(imageFormat, 0);
863         const bool              intFormat               = isIntFormat(imageFormat);
864         const bool              uintFormat              = isUintFormat(imageFormat);
865         const bool              floatFormat             = isFloatFormat(imageFormat);
866         const string    type                    = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
867         const string    vec4Type                = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
868
869         AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
870
871         if (isSpirvAtomicOperation(m_operation))
872         {
873                 const CaseVariant                                       caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
874                 const tcu::StringTemplate                       shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
875                 std::map<std::string, std::string>      specializations;
876
877                 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
878                 if (isSpirvAtomicNoLastArgOp(m_operation))
879                         specializations["LASTARG"] = "";
880
881                 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
882         }
883         else
884         {
885                 const string    versionDecl                             = glu::getGLSLVersionDeclaration(m_glslVersion);
886                 const UVec3             gridSize                                = getShaderGridSize(m_imageType, m_imageSize);
887                 const string    atomicCoord                             = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
888                 const string    invocationCoord                 = getCoordStr(m_imageType, "gx", "gy", "gz");
889                 const string    atomicArgExpr                   = type + getAtomicFuncArgumentShaderStr(m_operation,
890                                                                                                                                                                                 "gx", "gy", "gz",
891                                                                                                                                                                                 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
892
893                 const string    compareExchangeStr              = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
894                                                                                                   (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
895                                                                                                   "";
896                 const string    atomicInvocation                = getAtomicOperationShaderFuncName(m_operation) +
897                                                                                                 "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
898                 const string    shaderImageFormatStr    = getShaderImageFormatQualifier(m_format);
899                 const string    shaderImageTypeStr              = getShaderImageType(m_format, m_imageType);
900                 const string    extensions                              = "#extension GL_EXT_shader_atomic_float : enable\n"
901                                                                                                   "#extension GL_EXT_shader_atomic_float2 : enable\n"
902                                                                                                   "#extension GL_KHR_memory_scope_semantics : enable";
903
904                 string source = versionDecl + "\n" + extensions + "\n"
905                                                 "\n";
906
907                 if (64 == componentWidth)
908                 {
909                         source +=       "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
910                                                 "#extension GL_EXT_shader_image_int64 : require\n";
911                 }
912
913                         source +=       "precision highp " + shaderImageTypeStr + "; \n"
914                                                 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
915                                                 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
916                                                 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
917                                                 "\n"
918                                                 "void main (void)\n"
919                                                 "{\n"
920                                                 "       int gx = int(gl_GlobalInvocationID.x);\n"
921                                                 "       int gy = int(gl_GlobalInvocationID.y);\n"
922                                                 "       int gz = int(gl_GlobalInvocationID.z);\n"
923                                                 "       imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
924                                                 "}\n";
925
926                 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
927         }
928 }
929
930 class BinaryAtomicInstanceBase : public vkt::TestInstance
931 {
932 public:
933
934                                                                 BinaryAtomicInstanceBase (Context&                                              context,
935                                                                                                                   const string&                                 name,
936                                                                                                                   const ImageType                               imageType,
937                                                                                                                   const tcu::UVec3&                             imageSize,
938                                                                                                                   const TextureFormat&                  format,
939                                                                                                                   const VkImageTiling                   tiling,
940                                                                                                                   const AtomicOperation                 operation,
941                                                                                                                   const bool                                    useTransfer,
942                                                                                                                   const ShaderReadType                  shaderReadType,
943                                                                                                                   const ImageBackingType                backingType);
944
945         tcu::TestStatus                         iterate                                  (void);
946
947         virtual deUint32                        getOutputBufferSize              (void) const = 0;
948
949         virtual void                            prepareResources                 (const bool                                    useTransfer) = 0;
950         virtual void                            prepareDescriptors               (const bool                                    isTexelBuffer) = 0;
951
952         virtual void                            commandsBeforeCompute    (const VkCommandBuffer                 cmdBuffer) const = 0;
953         virtual void                            commandsAfterCompute     (const VkCommandBuffer                 cmdBuffer,
954                                                                                                                   const VkPipeline                              pipeline,
955                                                                                                                   const VkPipelineLayout                pipelineLayout,
956                                                                                                                    const VkDescriptorSet                descriptorSet,
957                                                                                                                   const VkDeviceSize&                   range,
958                                                                                                                   const bool                                    useTransfer) = 0;
959
960         virtual bool                            verifyResult                     (Allocation&                                   outputBufferAllocation,
961                                                                                                                   const bool                                    is64Bit) const = 0;
962
963 protected:
964
965         void                                            shaderFillImage                  (const VkCommandBuffer                 cmdBuffer,
966                                                                                                                   const VkBuffer&                               buffer,
967                                                                                                                   const VkPipeline                              pipeline,
968                                                                                                                   const VkPipelineLayout                pipelineLayout,
969                                                                                                                   const VkDescriptorSet                 descriptorSet,
970                                                                                                                   const VkDeviceSize&                   range,
971                                                                                                                   const tcu::UVec3&                             gridSize);
972
973         void                                            createImageAndView              (VkFormat                                               imageFormat,
974                                                                                                                  const tcu::UVec3&                              imageExent,
975                                                                                                                  bool                                                   useTransfer,
976                                                                                                                  de::MovePtr<Image>&                    imagePtr,
977                                                                                                                  Move<VkImageView>&                             imageViewPtr);
978
979         void                                            createImageResources    (const VkFormat&                                imageFormat,
980                                                                                                                  const bool                                             useTransfer);
981
982         const string                                    m_name;
983         const ImageType                                 m_imageType;
984         const tcu::UVec3                                m_imageSize;
985         const TextureFormat                             m_format;
986         const VkImageTiling                             m_tiling;
987         const AtomicOperation                   m_operation;
988         const bool                                              m_useTransfer;
989         const ShaderReadType                    m_readType;
990         const ImageBackingType                  m_backingType;
991
992         de::MovePtr<BufferWithMemory>   m_inputBuffer;
993         de::MovePtr<BufferWithMemory>   m_outputBuffer;
994         Move<VkBufferView>                              m_descResultBufferView;
995         Move<VkBufferView>                              m_descIntermResultsBufferView;
996         Move<VkDescriptorPool>                  m_descriptorPool;
997         Move<VkDescriptorSetLayout>             m_descriptorSetLayout;
998         Move<VkDescriptorSet>                   m_descriptorSet;
999
1000         Move<VkDescriptorSetLayout>             m_descriptorSetLayoutNoTransfer;
1001         Move<VkDescriptorPool>                  m_descriptorPoolNoTransfer;
1002
1003         de::MovePtr<Image>                              m_resultImage;
1004         Move<VkImageView>                               m_resultImageView;
1005
1006         std::vector<VkSemaphore>                m_waitSemaphores;
1007 };
1008
1009 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context&                            context,
1010                                                                                                         const string&                   name,
1011                                                                                                         const ImageType                 imageType,
1012                                                                                                         const tcu::UVec3&               imageSize,
1013                                                                                                         const TextureFormat&    format,
1014                                                                                                         const VkImageTiling             tiling,
1015                                                                                                         const AtomicOperation   operation,
1016                                                                                                         const bool                              useTransfer,
1017                                                                                                         const ShaderReadType    shaderReadType,
1018                                                                                                         const ImageBackingType  backingType)
1019         : vkt::TestInstance     (context)
1020         , m_name                        (name)
1021         , m_imageType           (imageType)
1022         , m_imageSize           (imageSize)
1023         , m_format                      (format)
1024         , m_tiling                      (tiling)
1025         , m_operation           (operation)
1026         , m_useTransfer         (useTransfer)
1027         , m_readType            (shaderReadType)
1028         , m_backingType         (backingType)
1029 {
1030 }
1031
1032 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
1033 {
1034         const VkDevice                  device                          = m_context.getDevice();
1035         const DeviceInterface&  deviceInterface         = m_context.getDeviceInterface();
1036         const VkQueue                   queue                           = m_context.getUniversalQueue();
1037         const deUint32                  queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1038         Allocator&                              allocator                       = m_context.getDefaultAllocator();
1039         const VkDeviceSize              imageSizeInBytes        = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1040         const VkDeviceSize              outBuffSizeInBytes      = getOutputBufferSize();
1041         const VkFormat                  imageFormat                     = mapTextureFormat(m_format);
1042         const bool                              isTexelBuffer           = (m_imageType == IMAGE_TYPE_BUFFER);
1043
1044         if (!isTexelBuffer)
1045         {
1046                 createImageResources(imageFormat, m_useTransfer);
1047         }
1048
1049         tcu::UVec3                              gridSize                        = getShaderGridSize(m_imageType, m_imageSize);
1050
1051         //Prepare the buffer with the initial data for the image
1052         m_inputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1053                                                                                                         device,
1054                                                                                                         allocator,
1055                                                                                                         makeBufferCreateInfo(imageSizeInBytes,
1056                                                                                                                                                  VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1057                                                                                                                                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1058                                                                                                                                                  (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1059                                                                                                         MemoryRequirement::HostVisible));
1060
1061         // Fill in buffer with initial data used for image.
1062         initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1063
1064         // Create a buffer to store shader output copied from result image
1065         m_outputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1066                                                                                                         device,
1067                                                                                                         allocator,
1068                                                                                                         makeBufferCreateInfo(outBuffSizeInBytes,
1069                                                                                                                                                  VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1070                                                                                                                                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1071                                                                                                                                                  (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1072                                                                                                         MemoryRequirement::HostVisible));
1073
1074         if (!isTexelBuffer)
1075         {
1076                 prepareResources(m_useTransfer);
1077         }
1078
1079         prepareDescriptors(isTexelBuffer);
1080
1081         Move<VkDescriptorSet>   descriptorSetFillImage;
1082         Move<VkShaderModule>    shaderModuleFillImage;
1083         Move<VkPipelineLayout>  pipelineLayoutFillImage;
1084         Move<VkPipeline>                pipelineFillImage;
1085
1086         Move<VkDescriptorSet>   descriptorSetReadImage;
1087         Move<VkShaderModule>    shaderModuleReadImage;
1088         Move<VkPipelineLayout>  pipelineLayoutReadImage;
1089         Move<VkPipeline>                pipelineReadImage;
1090
1091         if (!m_useTransfer)
1092         {
1093                 m_descriptorSetLayoutNoTransfer =
1094                         DescriptorSetLayoutBuilder()
1095                         .addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1096                         .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1097                         .build(deviceInterface, device);
1098
1099                 m_descriptorPoolNoTransfer =
1100                         DescriptorPoolBuilder()
1101                         .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1102                         .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1103                         .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1104
1105                 descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1106                         device,
1107                         *m_descriptorPoolNoTransfer,
1108                         *m_descriptorSetLayoutNoTransfer);
1109
1110                 descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1111                         device,
1112                         *m_descriptorPoolNoTransfer,
1113                         *m_descriptorSetLayoutNoTransfer);
1114
1115                 shaderModuleFillImage   = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1116                 pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1117                 pipelineFillImage               = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1118
1119                 if (m_readType == ShaderReadType::SPARSE)
1120                 {
1121                         shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1122                 }
1123                 else
1124                 {
1125                         shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1126                 }
1127                 pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1128                 pipelineReadImage               = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1129         }
1130
1131         // Create pipeline
1132         const Unique<VkShaderModule>    shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1133         const Unique<VkPipelineLayout>  pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1134         const Unique<VkPipeline>                pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1135
1136         // Create command buffer
1137         const Unique<VkCommandPool>             cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1138         const Unique<VkCommandBuffer>   cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1139
1140         beginCommandBuffer(deviceInterface, *cmdBuffer);
1141
1142         if (!isTexelBuffer)
1143         {
1144                 if (m_useTransfer)
1145                 {
1146                         const vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1147                         copyBufferToImage(deviceInterface,
1148                                                           *cmdBuffer,
1149                                                           *(*m_inputBuffer),
1150                                                           imageSizeInBytes,
1151                                                           bufferImageCopy,
1152                                                           VK_IMAGE_ASPECT_COLOR_BIT,
1153                                                           1,
1154                                                           getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1155                 }
1156                 else
1157                 {
1158                         shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1159                 }
1160                 commandsBeforeCompute(*cmdBuffer);
1161         }
1162
1163         deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1164         deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1165
1166         deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1167
1168         commandsAfterCompute(*cmdBuffer,
1169                                                  *pipelineReadImage,
1170                                                  *pipelineLayoutReadImage,
1171                                                  *descriptorSetReadImage,
1172                                                  outBuffSizeInBytes,
1173                                                  m_useTransfer);
1174
1175         const VkBufferMemoryBarrier     outputBufferPreHostReadBarrier
1176                 = makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1177                                                                   VK_ACCESS_HOST_READ_BIT,
1178                                                                   m_outputBuffer->get(),
1179                                                                   0ull,
1180                                                                   outBuffSizeInBytes);
1181
1182         deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1183                                                                            ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1184                                                                            VK_PIPELINE_STAGE_HOST_BIT,
1185                                                                            DE_FALSE, 0u, DE_NULL,
1186                                                                            1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1187
1188         endCommandBuffer(deviceInterface, *cmdBuffer);
1189
1190         std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1191         submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1192                 static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1193
1194         Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1195
1196         invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1197
1198         if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1199                 return tcu::TestStatus::pass("Comparison succeeded");
1200         else
1201                 return tcu::TestStatus::fail("Comparison failed");
1202 }
1203
1204 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer   cmdBuffer,
1205                                                                                                 const VkBuffer&                 buffer,
1206                                                                                                 const VkPipeline                pipeline,
1207                                                                                                 const VkPipelineLayout  pipelineLayout,
1208                                                                                                 const VkDescriptorSet   descriptorSet,
1209                                                                                                 const VkDeviceSize&             range,
1210                                                                                                 const tcu::UVec3&               gridSize)
1211 {
1212         const VkDevice                                  device                                  = m_context.getDevice();
1213         const DeviceInterface&                  deviceInterface                 = m_context.getDeviceInterface();
1214         const VkDescriptorImageInfo             descResultImageInfo             = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1215         const VkDescriptorBufferInfo    descResultBufferInfo    = makeDescriptorBufferInfo(buffer, 0, range);
1216         const VkImageSubresourceRange   subresourceRange                = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1217
1218         DescriptorSetUpdateBuilder()
1219                 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1220                 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1221                 .update(deviceInterface, device);
1222
1223         const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1224                                                                                                                                                 VK_ACCESS_SHADER_WRITE_BIT,
1225                                                                                                                                                 VK_IMAGE_LAYOUT_UNDEFINED,
1226                                                                                                                                                 VK_IMAGE_LAYOUT_GENERAL,
1227                                                                                                                                                 m_resultImage->get(),
1228                                                                                                                                                 subresourceRange);
1229
1230         deviceInterface.cmdPipelineBarrier(     cmdBuffer,
1231                                                                                 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1232                                                                                 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1233                                                                                 (VkDependencyFlags)0,
1234                                                                                 0, (const VkMemoryBarrier*)DE_NULL,
1235                                                                                 0, (const VkBufferMemoryBarrier*)DE_NULL,
1236                                                                                 1, &imageBarrierPre);
1237
1238         deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1239         deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1240
1241         deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1242
1243         const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1244                                                                                                                                                  VK_ACCESS_SHADER_READ_BIT,
1245                                                                                                                                                  VK_IMAGE_LAYOUT_GENERAL,
1246                                                                                                                                                  VK_IMAGE_LAYOUT_GENERAL,
1247                                                                                                                                                  m_resultImage->get(),
1248                                                                                                                                                  subresourceRange);
1249
1250         deviceInterface.cmdPipelineBarrier(     cmdBuffer,
1251                                                                                 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1252                                                                                 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1253                                                                                 (VkDependencyFlags)0,
1254                                                                                 0, (const VkMemoryBarrier*)DE_NULL,
1255                                                                                 0, (const VkBufferMemoryBarrier*)DE_NULL,
1256                                                                                 1, &imageBarrierPost);
1257 }
1258
1259 void BinaryAtomicInstanceBase::createImageAndView       (VkFormat                                               imageFormat,
1260                                                                                                          const tcu::UVec3&                              imageExent,
1261                                                                                                          bool                                                   useTransfer,
1262                                                                                                          de::MovePtr<Image>&                    imagePtr,
1263                                                                                                          Move<VkImageView>&                             imageViewPtr)
1264 {
1265         const VkDevice                  device                  = m_context.getDevice();
1266         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
1267         Allocator&                              allocator               = m_context.getDefaultAllocator();
1268         const VkImageUsageFlags usageFlags              = getUsageFlags(useTransfer);
1269         VkImageCreateFlags              createFlags             = 0u;
1270
1271         if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1272                 createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1273
1274         const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1275
1276         VkImageCreateInfo createInfo =
1277         {
1278                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                                    // VkStructureType                      sType;
1279                 DE_NULL,                                                                                                // const void*                          pNext;
1280                 createFlags,                                                                                    // VkImageCreateFlags           flags;
1281                 mapImageType(m_imageType),                                                              // VkImageType                          imageType;
1282                 imageFormat,                                                                                    // VkFormat                                     format;
1283                 makeExtent3D(imageExent),                                                               // VkExtent3D                           extent;
1284                 1u,                                                                                                             // deUint32                                     mipLevels;
1285                 numLayers,                                                                                              // deUint32                                     arrayLayers;
1286                 VK_SAMPLE_COUNT_1_BIT,                                                                  // VkSampleCountFlagBits        samples;
1287                 m_tiling,                                                                                               // VkImageTiling                        tiling;
1288                 usageFlags,                                                                                             // VkImageUsageFlags            usage;
1289                 VK_SHARING_MODE_EXCLUSIVE,                                                              // VkSharingMode                        sharingMode;
1290                 0u,                                                                                                             // deUint32                                     queueFamilyIndexCount;
1291                 DE_NULL,                                                                                                // const deUint32*                      pQueueFamilyIndices;
1292                 VK_IMAGE_LAYOUT_UNDEFINED,                                                              // VkImageLayout                        initialLayout;
1293         };
1294
1295 #ifndef CTS_USES_VULKANSC
1296         if (m_backingType == ImageBackingType::SPARSE)
1297         {
1298                 const auto&             vki                             = m_context.getInstanceInterface();
1299                 const auto              physicalDevice  = m_context.getPhysicalDevice();
1300                 const auto              sparseQueue             = m_context.getSparseQueue();
1301                 const auto              sparseQueueIdx  = m_context.getSparseQueueFamilyIndex();
1302                 const auto              universalQIdx   = m_context.getUniversalQueueFamilyIndex();
1303                 const deUint32  queueIndices[]  = { universalQIdx, sparseQueueIdx };
1304
1305                 createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1306
1307                 if (sparseQueueIdx != universalQIdx)
1308                 {
1309                         createInfo.sharingMode                          = VK_SHARING_MODE_CONCURRENT;
1310                         createInfo.queueFamilyIndexCount        = static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1311                         createInfo.pQueueFamilyIndices          = queueIndices;
1312                 }
1313
1314                 const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1315                 m_waitSemaphores.push_back(sparseImage->getSemaphore());
1316                 imagePtr = de::MovePtr<Image>(sparseImage);
1317         }
1318         else
1319 #endif // CTS_USES_VULKANSC
1320                 imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1321
1322         const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1323
1324         imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1325 }
1326
1327 void BinaryAtomicInstanceBase::createImageResources (const VkFormat&    imageFormat,
1328                                                                                                          const bool                     useTransfer)
1329 {
1330         //Create the image that is going to store results of atomic operations
1331         createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1332 }
1333
1334 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1335 {
1336 public:
1337
1338                                                 BinaryAtomicEndResultInstance  (Context&                                        context,
1339                                                                                                                 const string&                           name,
1340                                                                                                                 const ImageType                         imageType,
1341                                                                                                                 const tcu::UVec3&                       imageSize,
1342                                                                                                                 const TextureFormat&            format,
1343                                                                                                                 const VkImageTiling                     tiling,
1344                                                                                                                 const AtomicOperation           operation,
1345                                                                                                                 const bool                                      useTransfer,
1346                                                                                                                 const ShaderReadType            shaderReadType,
1347                                                                                                                 const ImageBackingType          backingType)
1348                                                         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1349
1350         virtual deUint32        getOutputBufferSize                        (void) const;
1351
1352         virtual void            prepareResources                           (const bool                                  useTransfer) { DE_UNREF(useTransfer); }
1353         virtual void            prepareDescriptors                         (const bool                                  isTexelBuffer);
1354
1355         virtual void            commandsBeforeCompute              (const VkCommandBuffer) const {}
1356         virtual void            commandsAfterCompute               (const VkCommandBuffer               cmdBuffer,
1357                                                                                                                 const VkPipeline                        pipeline,
1358                                                                                                                 const VkPipelineLayout          pipelineLayout,
1359                                                                                                                 const VkDescriptorSet           descriptorSet,
1360                                                                                                                 const VkDeviceSize&                     range,
1361                                                                                                                 const bool                                      useTransfer);
1362
1363         virtual bool            verifyResult                               (Allocation&                                 outputBufferAllocation,
1364                                                                                                                 const bool                                      is64Bit) const;
1365
1366 protected:
1367
1368         template <typename T>
1369         bool                            isValueCorrect                             (const T                                             resultValue,
1370                                                                                                                 deInt32                                         x,
1371                                                                                                                 deInt32                                         y,
1372                                                                                                                 deInt32                                         z,
1373                                                                                                                 const UVec3&                            gridSize,
1374                                                                                                                 const IVec3                                     extendedGridSize) const;
1375 };
1376
1377 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1378 {
1379         return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1380 }
1381
1382 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool      isTexelBuffer)
1383 {
1384         const VkDescriptorType  descriptorType  = isTexelBuffer ?
1385                                                                                         VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1386                                                                                         VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1387         const VkDevice                  device                  = m_context.getDevice();
1388         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
1389
1390         m_descriptorSetLayout =
1391                 DescriptorSetLayoutBuilder()
1392                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1393                 .build(deviceInterface, device);
1394
1395         m_descriptorPool =
1396                 DescriptorPoolBuilder()
1397                 .addType(descriptorType)
1398                 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1399
1400         m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1401
1402         if (isTexelBuffer)
1403         {
1404                 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1405
1406                 DescriptorSetUpdateBuilder()
1407                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1408                         .update(deviceInterface, device);
1409         }
1410         else
1411         {
1412                 const VkDescriptorImageInfo     descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1413
1414                 DescriptorSetUpdateBuilder()
1415                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1416                         .update(deviceInterface, device);
1417         }
1418 }
1419
1420 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer         cmdBuffer,
1421                                                                                                                   const VkPipeline                      pipeline,
1422                                                                                                                   const VkPipelineLayout        pipelineLayout,
1423                                                                                                                   const VkDescriptorSet         descriptorSet,
1424                                                                                                                   const VkDeviceSize&           range,
1425                                                                                                                   const bool                            useTransfer)
1426 {
1427         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
1428         const VkImageSubresourceRange   subresourceRange        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1429         const UVec3                                             layerSize                       = getLayerSize(m_imageType, m_imageSize);
1430
1431         if (m_imageType == IMAGE_TYPE_BUFFER)
1432         {
1433                 m_outputBuffer = m_inputBuffer;
1434         }
1435         else if (useTransfer)
1436         {
1437                 const VkImageMemoryBarrier      resultImagePostDispatchBarrier =
1438                         makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1439                                                                         VK_ACCESS_TRANSFER_READ_BIT,
1440                                                                         VK_IMAGE_LAYOUT_GENERAL,
1441                                                                         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1442                                                                         m_resultImage->get(),
1443                                                                         subresourceRange);
1444
1445                 deviceInterface.cmdPipelineBarrier(     cmdBuffer,
1446                                                                                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1447                                                                                         VK_PIPELINE_STAGE_TRANSFER_BIT,
1448                                                                                         DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1449                                                                                         1u, &resultImagePostDispatchBarrier);
1450
1451                 const VkBufferImageCopy         bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1452
1453                 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1454         }
1455         else
1456         {
1457                 const VkDevice                                  device                                  = m_context.getDevice();
1458                 const VkDescriptorImageInfo             descResultImageInfo             = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1459                 const VkDescriptorBufferInfo    descResultBufferInfo    = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1460
1461                 DescriptorSetUpdateBuilder()
1462                         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1463                         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1464                         .update(deviceInterface, device);
1465
1466                 const VkImageMemoryBarrier      resultImagePostDispatchBarrier =
1467                         makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1468                                                                         VK_ACCESS_SHADER_READ_BIT,
1469                                                                         VK_IMAGE_LAYOUT_GENERAL,
1470                                                                         VK_IMAGE_LAYOUT_GENERAL,
1471                                                                         m_resultImage->get(),
1472                                                                         subresourceRange);
1473
1474                 deviceInterface.cmdPipelineBarrier(     cmdBuffer,
1475                                                                                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1476                                                                                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1477                                                                                         DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1478                                                                                         1u, &resultImagePostDispatchBarrier);
1479
1480                 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1481                 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1482
1483                 switch (m_imageType)
1484                 {
1485                         case IMAGE_TYPE_1D_ARRAY:
1486                                 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1487                                 break;
1488                         case IMAGE_TYPE_2D_ARRAY:
1489                         case IMAGE_TYPE_CUBE:
1490                         case IMAGE_TYPE_CUBE_ARRAY:
1491                                 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1492                                 break;
1493                         default:
1494                                 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1495                                 break;
1496                 }
1497         }
1498 }
1499
1500 bool BinaryAtomicEndResultInstance::verifyResult (Allocation&   outputBufferAllocation,
1501                                                                                                   const bool    is64Bit) const
1502 {
1503         const UVec3     gridSize                        = getShaderGridSize(m_imageType, m_imageSize);
1504         const IVec3 extendedGridSize    = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1505
1506         tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1507
1508         for (deInt32 z = 0; z < resultBuffer.getDepth();  z++)
1509         for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1510         for (deInt32 x = 0; x < resultBuffer.getWidth();  x++)
1511         {
1512                 const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1513                 deInt32 floatToIntValue = 0;
1514                 bool isFloatValue = false;
1515                 if (isFloatFormat(mapTextureFormat(m_format)))
1516                 {
1517                         isFloatValue = true;
1518                         floatToIntValue = static_cast<deInt32>(*((float*)resultValue));
1519                 }
1520
1521                 if (isOrderIndependentAtomicOperation(m_operation))
1522                 {
1523                         if (isUintFormat(mapTextureFormat(m_format)))
1524                         {
1525                                 if(is64Bit)
1526                                 {
1527                                         if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1528                                                 return false;
1529                                 }
1530                                 else
1531                                 {
1532                                         if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1533                                                 return false;
1534                                 }
1535                         }
1536                         else if (isIntFormat(mapTextureFormat(m_format)))
1537                         {
1538                                 if (is64Bit)
1539                                 {
1540                                         if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1541                                                 return false;
1542                                 }
1543                                 else
1544                                 {
1545                                         if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1546                                                 return false;
1547                                 }
1548                         }
1549                         else
1550                         {
1551                                 // 32-bit floating point
1552                                 if (!isValueCorrect<deInt32>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1553                                         return false;
1554                         }
1555                 }
1556                 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1557                 {
1558                         // Check if the end result equals one of the atomic args.
1559                         bool matchFound = false;
1560
1561                         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1562                         {
1563                                 const IVec3 gid(x + i*gridSize.x(), y, z);
1564                                 matchFound = is64Bit ?
1565                                         (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1566                                         isFloatValue ?
1567                                         floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1568                                         (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1569
1570                         }
1571
1572                         if (!matchFound)
1573                                 return false;
1574                 }
1575                 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1576                 {
1577                         // Check if the end result equals one of the atomic args.
1578                         bool matchFound = false;
1579
1580                         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1581                         {
1582                                 const IVec3 gid(x + i*gridSize.x(), y, z);
1583                                 matchFound = is64Bit ?
1584                                         (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1585                                         isFloatValue ?
1586                                         floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1587                                         (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1588                         }
1589
1590                         if (!matchFound)
1591                                 return false;
1592                 }
1593                 else
1594                         DE_ASSERT(false);
1595         }
1596         return true;
1597 }
1598
1599 template <typename T>
1600 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1601 {
1602         T reference = getOperationInitialValue<T>(m_operation);
1603         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1604         {
1605                 const IVec3 gid(x + i*gridSize.x(), y, z);
1606                 T                       arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1607                 reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1608         }
1609         return (resultValue == reference);
1610 }
1611
1612 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1613 {
1614         return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1615 }
1616
1617 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1618 {
1619 public:
1620
1621                                                 BinaryAtomicIntermValuesInstance   (Context&                            context,
1622                                                                                                                         const string&                   name,
1623                                                                                                                         const ImageType                 imageType,
1624                                                                                                                         const tcu::UVec3&               imageSize,
1625                                                                                                                         const TextureFormat&    format,
1626                                                                                                                         const VkImageTiling             tiling,
1627                                                                                                                         const AtomicOperation   operation,
1628                                                                                                                         const bool                              useTransfer,
1629                                                                                                                         const ShaderReadType    shaderReadType,
1630                                                                                                                         const ImageBackingType  backingType)
1631                                                         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1632
1633         virtual deUint32        getOutputBufferSize                                (void) const;
1634
1635         virtual void            prepareResources                                   (const bool                          useTransfer);
1636         virtual void            prepareDescriptors                                 (const bool                          isTexelBuffer);
1637
1638         virtual void            commandsBeforeCompute                      (const VkCommandBuffer       cmdBuffer) const;
1639         virtual void            commandsAfterCompute                       (const VkCommandBuffer       cmdBuffer,
1640                                                                                                                         const VkPipeline                pipeline,
1641                                                                                                                         const VkPipelineLayout  pipelineLayout,
1642                                                                                                                         const VkDescriptorSet   descriptorSet,
1643                                                                                                                         const VkDeviceSize&             range,
1644                                                                                                                         const bool                              useTransfer);
1645
1646         virtual bool            verifyResult                                       (Allocation&                         outputBufferAllocation,
1647                                                                                                                         const bool                              is64Bit) const;
1648
1649 protected:
1650
1651         template <typename T>
1652         bool                            areValuesCorrect                                   (tcu::ConstPixelBufferAccess& resultBuffer,
1653                                                                                                                         const bool isFloatingPoint,
1654                                                                                                                         deInt32 x,
1655                                                                                                                         deInt32 y,
1656                                                                                                                         deInt32 z,
1657                                                                                                                         const UVec3& gridSize,
1658                                                                                                                         const IVec3 extendedGridSize) const;
1659
1660         template <typename T>
1661         bool                            verifyRecursive                                    (const deInt32                       index,
1662                                                                                                                         const T                                 valueSoFar,
1663                                                                                                                         bool                                    argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1664                                                                                                                         const T                                 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1665                                                                                                                         const T                                 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1666         de::MovePtr<Image>      m_intermResultsImage;
1667         Move<VkImageView>       m_intermResultsImageView;
1668 };
1669
1670 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1671 {
1672         return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1673 }
1674
1675 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1676 {
1677         const UVec3 layerSize                   = getLayerSize(m_imageType, m_imageSize);
1678         const bool  isCubeBasedImage    = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1679         const UVec3 extendedLayerSize   = isCubeBasedImage      ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1680                                                                                                                 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1681
1682         createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1683 }
1684
1685 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool   isTexelBuffer)
1686 {
1687         const VkDescriptorType  descriptorType  = isTexelBuffer ?
1688                                                                                         VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1689                                                                                         VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1690
1691         const VkDevice                  device                  = m_context.getDevice();
1692         const DeviceInterface&  deviceInterface = m_context.getDeviceInterface();
1693
1694         m_descriptorSetLayout =
1695                 DescriptorSetLayoutBuilder()
1696                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1697                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1698                 .build(deviceInterface, device);
1699
1700         m_descriptorPool =
1701                 DescriptorPoolBuilder()
1702                 .addType(descriptorType, 2u)
1703                 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1704
1705         m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1706
1707         if (isTexelBuffer)
1708         {
1709                 m_descResultBufferView                  = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1710                 m_descIntermResultsBufferView   = makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1711
1712                 DescriptorSetUpdateBuilder()
1713                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1714                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1715                         .update(deviceInterface, device);
1716         }
1717         else
1718         {
1719                 const VkDescriptorImageInfo     descResultImageInfo                     = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1720                 const VkDescriptorImageInfo     descIntermResultsImageInfo      = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1721
1722                 DescriptorSetUpdateBuilder()
1723                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1724                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1725                         .update(deviceInterface, device);
1726         }
1727 }
1728
1729 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1730 {
1731         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
1732         const VkImageSubresourceRange   subresourceRange        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1733
1734         const VkImageMemoryBarrier      imagePreDispatchBarrier =
1735                 makeImageMemoryBarrier( 0u,
1736                                                                 VK_ACCESS_SHADER_WRITE_BIT,
1737                                                                 VK_IMAGE_LAYOUT_UNDEFINED,
1738                                                                 VK_IMAGE_LAYOUT_GENERAL,
1739                                                                 m_intermResultsImage->get(),
1740                                                                 subresourceRange);
1741
1742         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1743 }
1744
1745 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer              cmdBuffer,
1746                                                                                                                          const VkPipeline                       pipeline,
1747                                                                                                                          const VkPipelineLayout         pipelineLayout,
1748                                                                                                                          const VkDescriptorSet          descriptorSet,
1749                                                                                                                          const VkDeviceSize&            range,
1750                                                                                                                          const bool                                     useTransfer)
1751 {
1752         // nothing is needed for texel image buffer
1753         if (m_imageType == IMAGE_TYPE_BUFFER)
1754                 return;
1755
1756         const DeviceInterface&                  deviceInterface         = m_context.getDeviceInterface();
1757         const VkImageSubresourceRange   subresourceRange        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1758         const UVec3                                             layerSize                       = getLayerSize(m_imageType, m_imageSize);
1759
1760         if (useTransfer)
1761         {
1762                 const VkImageMemoryBarrier      imagePostDispatchBarrier =
1763                         makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1764                                                                         VK_ACCESS_TRANSFER_READ_BIT,
1765                                                                         VK_IMAGE_LAYOUT_GENERAL,
1766                                                                         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1767                                                                         m_intermResultsImage->get(),
1768                                                                         subresourceRange);
1769
1770                 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1771
1772                 const UVec3                                     extendedLayerSize               = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1773                 const VkBufferImageCopy         bufferImageCopyParams   = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1774
1775                 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1776         }
1777         else
1778         {
1779                 const VkDevice                                  device                                  = m_context.getDevice();
1780                 const VkDescriptorImageInfo             descResultImageInfo             = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1781                 const VkDescriptorBufferInfo    descResultBufferInfo    = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1782
1783                 DescriptorSetUpdateBuilder()
1784                         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1785                         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1786                         .update(deviceInterface, device);
1787
1788                 const VkImageMemoryBarrier      resultImagePostDispatchBarrier =
1789                 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1790                                                                 VK_ACCESS_SHADER_READ_BIT,
1791                                                                 VK_IMAGE_LAYOUT_GENERAL,
1792                                                                 VK_IMAGE_LAYOUT_GENERAL,
1793                                                                 m_intermResultsImage->get(),
1794                                                                 subresourceRange);
1795
1796                 deviceInterface.cmdPipelineBarrier(     cmdBuffer,
1797                                                                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1798                                                                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1799                                                                         DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1800                                                                         1u, &resultImagePostDispatchBarrier);
1801
1802                 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1803                 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1804
1805                 switch (m_imageType)
1806                 {
1807                         case IMAGE_TYPE_1D_ARRAY:
1808                                 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1809                                 break;
1810                         case IMAGE_TYPE_2D_ARRAY:
1811                         case IMAGE_TYPE_CUBE:
1812                         case IMAGE_TYPE_CUBE_ARRAY:
1813                                 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1814                                 break;
1815                         default:
1816                                 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1817                                 break;
1818                 }
1819         }
1820 }
1821
1822 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation&        outputBufferAllocation,
1823                                                                                                          const bool             is64Bit) const
1824 {
1825         const UVec3     gridSize                 = getShaderGridSize(m_imageType, m_imageSize);
1826         const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1827
1828         tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1829
1830         for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1831         for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1832         for (deUint32 x = 0; x < gridSize.x(); x++)
1833         {
1834                 if (isUintFormat(mapTextureFormat(m_format)))
1835                 {
1836                         if (is64Bit)
1837                         {
1838                                 if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1839                                         return false;
1840                         }
1841                         else
1842                         {
1843                                 if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1844                                         return false;
1845                         }
1846                 }
1847                 else if (isIntFormat(mapTextureFormat(m_format)))
1848                 {
1849                         if (is64Bit)
1850                         {
1851                                 if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1852                                         return false;
1853                         }
1854                         else
1855                         {
1856                                 if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1857                                         return false;
1858                         }
1859                 }
1860                 else
1861                 {
1862                         // 32-bit floating point
1863                         if (!areValuesCorrect<deInt32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1864                                 return false;
1865                 }
1866         }
1867
1868         return true;
1869 }
1870
1871 template <typename T>
1872 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1873 {
1874         T               resultValues[NUM_INVOCATIONS_PER_PIXEL];
1875         T               atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1876         bool    argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1877
1878         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1879         {
1880                 IVec3 gid(x + i*gridSize.x(), y, z);
1881                 T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1882                 if (isFloatingPoint)
1883                 {
1884                         float fData;
1885                         deMemcpy(&fData, &data, sizeof(fData));
1886                         data = static_cast<T>(fData);
1887                 }
1888                 resultValues[i] = data;
1889                 atomicArgs[i]   = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1890                 argsUsed[i]             = false;
1891         }
1892
1893         // Verify that the return values form a valid sequence.
1894         return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1895 }
1896
1897 template <typename T>
1898 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32   index,
1899                                                                                                                 const T                 valueSoFar,
1900                                                                                                                 bool                    argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1901                                                                                                                 const T                 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1902                                                                                                                 const T                 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1903 {
1904         if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1905                 return true;
1906
1907         for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1908         {
1909                 if (!argsUsed[i] && resultValues[i] == valueSoFar)
1910                 {
1911                         argsUsed[i] = true;
1912
1913                         if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1914                         {
1915                                 return true;
1916                         }
1917
1918                         argsUsed[i] = false;
1919                 }
1920         }
1921
1922         return false;
1923 }
1924
1925 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1926 {
1927         return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1928 }
1929
1930 } // anonymous ns
1931
1932 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1933 {
1934         de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
1935
1936         struct ImageParams
1937         {
1938                 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1939                         : m_imageType   (imageType)
1940                         , m_imageSize   (imageSize)
1941                 {
1942                 }
1943                 const ImageType         m_imageType;
1944                 const tcu::UVec3        m_imageSize;
1945         };
1946
1947         const ImageParams imageParamsArray[] =
1948         {
1949                 ImageParams(IMAGE_TYPE_1D,                      tcu::UVec3(64u, 1u, 1u)),
1950                 ImageParams(IMAGE_TYPE_1D_ARRAY,        tcu::UVec3(64u, 1u, 8u)),
1951                 ImageParams(IMAGE_TYPE_2D,                      tcu::UVec3(64u, 64u, 1u)),
1952                 ImageParams(IMAGE_TYPE_2D_ARRAY,        tcu::UVec3(64u, 64u, 8u)),
1953                 ImageParams(IMAGE_TYPE_3D,                      tcu::UVec3(48u, 48u, 8u)),
1954                 ImageParams(IMAGE_TYPE_CUBE,            tcu::UVec3(64u, 64u, 1u)),
1955                 ImageParams(IMAGE_TYPE_CUBE_ARRAY,      tcu::UVec3(64u, 64u, 2u)),
1956                 ImageParams(IMAGE_TYPE_BUFFER,          tcu::UVec3(64u, 1u, 1u))
1957         };
1958
1959         const tcu::TextureFormat formats[] =
1960         {
1961                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1962                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1963                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1964                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1965                 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1966         };
1967
1968     static const VkImageTiling s_tilings[] = {
1969         VK_IMAGE_TILING_OPTIMAL,
1970         VK_IMAGE_TILING_LINEAR,
1971     };
1972
1973         const struct
1974         {
1975                 ShaderReadType          type;
1976                 const char*                     name;
1977         } readTypes[] =
1978         {
1979                 {       ShaderReadType::NORMAL, "normal_read"   },
1980 #ifndef CTS_USES_VULKANSC
1981                 {       ShaderReadType::SPARSE, "sparse_read"   },
1982 #endif // CTS_USES_VULKANSC
1983         };
1984
1985         const struct
1986         {
1987                 ImageBackingType        type;
1988                 const char*                     name;
1989         } backingTypes[] =
1990         {
1991                 {       ImageBackingType::NORMAL,       "normal_img"    },
1992 #ifndef CTS_USES_VULKANSC
1993                 {       ImageBackingType::SPARSE,       "sparse_img"    },
1994 #endif // CTS_USES_VULKANSC
1995         };
1996
1997         for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1998         {
1999                 const AtomicOperation operation = (AtomicOperation)operationI;
2000
2001                 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
2002
2003                 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
2004                 {
2005                         const ImageType  imageType = imageParamsArray[imageTypeNdx].m_imageType;
2006                         const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
2007
2008                         de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
2009
2010                         for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
2011                         {
2012                                 const bool                              useTransfer     = (useTransferIdx > 0);
2013                                 const string                    groupName       = (!useTransfer ? "no" : "") + string("transfer");
2014
2015                                 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str(), ""));
2016
2017                                 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
2018                                 {
2019                                         const auto& readType = readTypes[readTypeIdx];
2020
2021                                         de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name, ""));
2022
2023                                         for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
2024                                         {
2025                                                 const auto& backingType = backingTypes[backingTypeIdx];
2026
2027                                                 de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name, ""));
2028
2029                                                 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
2030                                                 {
2031                                                         for (int tilingNdx = 0; tilingNdx < DE_LENGTH_OF_ARRAY(s_tilings); tilingNdx++)
2032                                                         {
2033                                                                 const TextureFormat&    format          = formats[formatNdx];
2034                                                                 const std::string               formatName      = getShaderImageFormatQualifier(format);
2035                                                                 const char* suffix = (s_tilings[tilingNdx] == VK_IMAGE_TILING_OPTIMAL) ? "" : "_linear";
2036
2037                                                                 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
2038                                                                 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
2039                                                                 {
2040                                                                         continue;
2041                                                                 }
2042
2043                                                                 // Only 2D and 3D images may support sparse residency.
2044                                                                 // VK_IMAGE_TILING_LINEAR does not support sparse residency
2045                                                                 const auto vkImageType = mapImageType(imageType);
2046                                                                 if (backingType.type == ImageBackingType::SPARSE && ((vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D) || (s_tilings[tilingNdx] == VK_IMAGE_TILING_LINEAR)))
2047                                                                         continue;
2048
2049                                                                 // Only some operations are supported on floating-point
2050                                                                 if (format.type == tcu::TextureFormat::FLOAT)
2051                                                                 {
2052                                                                         if (operation != ATOMIC_OPERATION_ADD &&
2053 #ifndef CTS_USES_VULKANSC
2054                                                                                 operation != ATOMIC_OPERATION_MIN &&
2055                                                                                 operation != ATOMIC_OPERATION_MAX &&
2056 #endif // CTS_USES_VULKANSC
2057                                                                                 operation != ATOMIC_OPERATION_EXCHANGE)
2058                                                                         {
2059                                                                                 continue;
2060                                                                         }
2061                                                                 }
2062
2063                                                                 if (readType.type == ShaderReadType::SPARSE)
2064                                                                 {
2065                                                                         // When using transfer, shader reads will not be used, so avoid creating two identical cases.
2066                                                                         if (useTransfer)
2067                                                                                 continue;
2068
2069                                                                         // Sparse reads are not supported for all types of images.
2070                                                                         if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
2071                                                                                 continue;
2072                                                                 }
2073
2074                                                                 //!< Atomic case checks the end result of the operations, and not the intermediate return values
2075                                                                 const string caseEndResult = formatName + "_end_result" + suffix;
2076                                                                 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2077
2078                                                                 //!< Atomic case checks the return values of the atomic function and not the end result.
2079                                                                 const string caseIntermValues = formatName + "_intermediate_values" + suffix;
2080                                                                 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2081                                                         }
2082                                                 }
2083
2084                                                 readTypeGroup->addChild(backingTypeGroup.release());
2085                                         }
2086
2087                                         transferGroup->addChild(readTypeGroup.release());
2088                                 }
2089
2090                                 imageTypeGroup->addChild(transferGroup.release());
2091                         }
2092
2093                         operationGroup->addChild(imageTypeGroup.release());
2094                 }
2095
2096                 imageAtomicOperationsTests->addChild(operationGroup.release());
2097         }
2098
2099         return imageAtomicOperationsTests.release();
2100 }
2101
2102 } // image
2103 } // vkt