Test behaviour of color write enable with colorWriteMask
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / shaderexecutor / vktAtomicOperationTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2017 Google Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Atomic operations (OpAtomic*) tests.
23  *//*--------------------------------------------------------------------*/
24
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39
40 #include "deStringUtil.hpp"
41 #include "deSharedPtr.hpp"
42 #include "deRandom.hpp"
43 #include "deArrayUtil.hpp"
44
45 #include <string>
46 #include <memory>
47 #include <cmath>
48
49 namespace vkt
50 {
51 namespace shaderexecutor
52 {
53
54 namespace
55 {
56
57 using de::UniquePtr;
58 using de::MovePtr;
59 using std::vector;
60
61 using namespace vk;
62
63 enum class AtomicMemoryType
64 {
65         BUFFER = 0,     // Normal buffer.
66         SHARED,         // Shared global struct in a compute workgroup.
67         REFERENCE,      // Buffer passed as a reference.
68 };
69
70 // Helper struct to indicate the shader type and if it should use shared global memory.
71 class AtomicShaderType
72 {
73 public:
74         AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
75                 : m_type                                (type)
76                 , m_atomicMemoryType    (memoryType)
77         {
78                 // Shared global memory can only be set to true with compute shaders.
79                 DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
80         }
81
82         glu::ShaderType         getType                                 (void) const    { return m_type; }
83         AtomicMemoryType        getMemoryType                   (void) const    { return m_atomicMemoryType; }
84
85 private:
86         glu::ShaderType         m_type;
87         AtomicMemoryType        m_atomicMemoryType;
88 };
89
90 // Buffer helper
91 class Buffer
92 {
93 public:
94                                                 Buffer                          (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
95
96         VkBuffer                        getBuffer                       (void) const { return *m_buffer;                                        }
97         void*                           getHostPtr                      (void) const { return m_allocation->getHostPtr();       }
98         void                            flush                           (void);
99         void                            invalidate                      (void);
100
101 private:
102         const DeviceInterface&          m_vkd;
103         const VkDevice                          m_device;
104         const VkQueue                           m_queue;
105         const deUint32                          m_queueIndex;
106         const Unique<VkBuffer>          m_buffer;
107         const UniquePtr<Allocation>     m_allocation;
108 };
109
110 typedef de::SharedPtr<Buffer> BufferSp;
111
112 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
113 {
114         const VkBufferCreateInfo createInfo     =
115         {
116                 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
117                 DE_NULL,
118                 (VkBufferCreateFlags)0,
119                 size,
120                 usageFlags,
121                 VK_SHARING_MODE_EXCLUSIVE,
122                 0u,
123                 DE_NULL
124         };
125         return createBuffer(vkd, device, &createInfo);
126 }
127
128 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
129 {
130         const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
131         MovePtr<Allocation>     alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
132
133         VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
134
135         return alloc;
136 }
137
138 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
139         : m_vkd                 (context.getDeviceInterface())
140         , m_device              (context.getDevice())
141         , m_queue               (context.getUniversalQueue())
142         , m_queueIndex  (context.getUniversalQueueFamilyIndex())
143         , m_buffer              (createBuffer                   (context.getDeviceInterface(),
144                                                                                          context.getDevice(),
145                                                                                          (VkDeviceSize)size,
146                                                                                          usage))
147         , m_allocation  (allocateAndBindMemory  (context.getDeviceInterface(),
148                                                                                          context.getDevice(),
149                                                                                          context.getDefaultAllocator(),
150                                                                                          *m_buffer,
151                                                                                          useRef))
152 {
153 }
154
155 void Buffer::flush (void)
156 {
157         flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
158 }
159
160 void Buffer::invalidate (void)
161 {
162         const auto      cmdPool                 = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
163         const auto      cmdBufferPtr    = vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
164         const auto      cmdBuffer               = cmdBufferPtr.get();
165         const auto      bufferBarrier   = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
166
167         beginCommandBuffer(m_vkd, cmdBuffer);
168         m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
169         endCommandBuffer(m_vkd, cmdBuffer);
170         submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
171
172         invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
173 }
174
175 // Tests
176
177 enum AtomicOperation
178 {
179         ATOMIC_OP_EXCHANGE = 0,
180         ATOMIC_OP_COMP_SWAP,
181         ATOMIC_OP_ADD,
182         ATOMIC_OP_MIN,
183         ATOMIC_OP_MAX,
184         ATOMIC_OP_AND,
185         ATOMIC_OP_OR,
186         ATOMIC_OP_XOR,
187
188         ATOMIC_OP_LAST
189 };
190
191 std::string atomicOp2Str (AtomicOperation op)
192 {
193         static const char* const s_names[] =
194         {
195                 "atomicExchange",
196                 "atomicCompSwap",
197                 "atomicAdd",
198                 "atomicMin",
199                 "atomicMax",
200                 "atomicAnd",
201                 "atomicOr",
202                 "atomicXor"
203         };
204         return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
205 }
206
207 enum
208 {
209         NUM_ELEMENTS = 32
210 };
211
212 enum DataType
213 {
214         DATA_TYPE_INT32 = 0,
215         DATA_TYPE_UINT32,
216         DATA_TYPE_FLOAT32,
217         DATA_TYPE_INT64,
218         DATA_TYPE_UINT64,
219         DATA_TYPE_FLOAT64,
220
221         DATA_TYPE_LAST
222 };
223
224 std::string dataType2Str(DataType type)
225 {
226         static const char* const s_names[] =
227         {
228                 "int",
229                 "uint",
230                 "float",
231                 "int64_t",
232                 "uint64_t",
233                 "double",
234         };
235         return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
236 }
237
238 class BufferInterface
239 {
240 public:
241         virtual void setBuffer(void* ptr) = 0;
242
243         virtual size_t bufferSize() = 0;
244
245         virtual void fillWithTestData(de::Random &rnd) = 0;
246
247         virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
248
249         virtual ~BufferInterface() {};
250 };
251
252 template<typename dataTypeT>
253 class TestBuffer : public BufferInterface
254 {
255 public:
256
257         TestBuffer(AtomicOperation      atomicOp)
258                 : m_atomicOp(atomicOp)
259         {}
260
261         template<typename T>
262         struct BufferData
263         {
264                 // Use half the number of elements for inout to cause overlap between atomic operations.
265                 // Each inout element at index i will have two atomic operations using input from
266                 // indices i and i + NUM_ELEMENTS / 2.
267                 T                       inout[NUM_ELEMENTS / 2];
268                 T                       input[NUM_ELEMENTS];
269                 T                       compare[NUM_ELEMENTS];
270                 T                       output[NUM_ELEMENTS];
271                 T                       invocationHitCount[NUM_ELEMENTS];
272                 deInt32         index;
273         };
274
275         virtual void setBuffer(void* ptr)
276         {
277                 m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
278         }
279
280         virtual size_t bufferSize()
281         {
282                 return sizeof(BufferData<dataTypeT>);
283         }
284
285         virtual void fillWithTestData(de::Random &rnd)
286         {
287                 dataTypeT pattern;
288                 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
289
290                 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
291                 {
292                         m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
293                         // The first half of compare elements match with every even index.
294                         // The second half matches with odd indices. This causes the
295                         // overlapping operations to only select one.
296                         m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
297                         m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
298                 }
299                 for (int i = 0; i < NUM_ELEMENTS; i++)
300                 {
301                         m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
302                         m_ptr->output[i] = pattern;
303                         m_ptr->invocationHitCount[i] = 0;
304                 }
305                 m_ptr->index = 0;
306
307                 // Take a copy to be used when calculating expected values.
308                 m_original = *m_ptr;
309         }
310
311         virtual void checkResults(tcu::ResultCollector& resultCollector)
312         {
313                 checkOperation(m_original, *m_ptr, resultCollector);
314         }
315
316         template<typename T>
317         struct Expected
318         {
319                 T m_inout;
320                 T m_output[2];
321
322                 Expected (T inout, T output0, T output1)
323                 : m_inout(inout)
324                 {
325                         m_output[0] = output0;
326                         m_output[1] = output1;
327                 }
328
329                 bool compare (T inout, T output0, T output1)
330                 {
331                         return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
332                                         && deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
333                                         && deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
334                 }
335         };
336
337         void checkOperation     (const BufferData<dataTypeT>&   original,
338                                                  const BufferData<dataTypeT>&   result,
339                                                  tcu::ResultCollector&                  resultCollector);
340
341         const AtomicOperation   m_atomicOp;
342
343         BufferData<dataTypeT>* m_ptr;
344         BufferData<dataTypeT>  m_original;
345
346 };
347
348 template<typename dataTypeT>
349 class TestBufferFloatingPoint : public BufferInterface
350 {
351 public:
352
353         TestBufferFloatingPoint(AtomicOperation atomicOp)
354                 : m_atomicOp(atomicOp)
355         {}
356
357         template<typename T>
358         struct BufferDataFloatingPoint
359         {
360                 // Use half the number of elements for inout to cause overlap between atomic operations.
361                 // Each inout element at index i will have two atomic operations using input from
362                 // indices i and i + NUM_ELEMENTS / 2.
363                 T                       inout[NUM_ELEMENTS / 2];
364                 T                       input[NUM_ELEMENTS];
365                 T                       compare[NUM_ELEMENTS];
366                 T                       output[NUM_ELEMENTS];
367                 T                       invocationHitCount[NUM_ELEMENTS];
368                 deInt32         index;
369         };
370
371         virtual void setBuffer(void* ptr)
372         {
373                 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
374         }
375
376         virtual size_t bufferSize()
377         {
378                 return sizeof(BufferDataFloatingPoint<dataTypeT>);
379         }
380
381         virtual void fillWithTestData(de::Random& rnd)
382         {
383                 dataTypeT pattern;
384                 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
385
386                 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
387                 {
388                         m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getFloat());
389                         // The first half of compare elements match with every even index.
390                         // The second half matches with odd indices. This causes the
391                         // overlapping operations to only select one.
392                         m_ptr->compare[i] = m_ptr->inout[i] + (dataTypeT)(i % 2);
393                         m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + (dataTypeT)(1 - (i % 2));
394                 }
395                 for (int i = 0; i < NUM_ELEMENTS; i++)
396                 {
397                         m_ptr->input[i] = static_cast<dataTypeT>(rnd.getFloat());
398                         m_ptr->output[i] = pattern;
399                         m_ptr->invocationHitCount[i] = 0;
400                 }
401                 m_ptr->index = 0;
402
403                 // Take a copy to be used when calculating expected values.
404                 m_original = *m_ptr;
405         }
406
407         virtual void checkResults(tcu::ResultCollector& resultCollector)
408         {
409                 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
410         }
411
412         template<typename T>
413         struct Expected
414         {
415                 T m_inout;
416                 T m_output[2];
417
418                 Expected(T inout, T output0, T output1)
419                         : m_inout(inout)
420                 {
421                         m_output[0] = output0;
422                         m_output[1] = output1;
423                 }
424
425                 bool compare(T inout, T output0, T output1)
426                 {
427                         T diff1 = static_cast<T>(fabs(m_inout - inout));
428                         T diff2 = static_cast<T>(fabs(m_output[0] - output0));
429                         T diff3 = static_cast<T>(fabs(m_output[1] - output1));
430                         const T epsilon = static_cast<T>(0.00001);
431                         return (diff1 < epsilon) && (diff2 < epsilon) && (diff3 < epsilon);
432                 }
433         };
434
435         void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
436                 const BufferDataFloatingPoint<dataTypeT>& result,
437                 tcu::ResultCollector& resultCollector);
438
439         const AtomicOperation   m_atomicOp;
440
441         BufferDataFloatingPoint<dataTypeT>* m_ptr;
442         BufferDataFloatingPoint<dataTypeT>  m_original;
443
444 };
445
446 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
447 {
448         switch (type)
449         {
450         case DATA_TYPE_INT32:
451                 return new TestBuffer<deInt32>(atomicOp);
452         case DATA_TYPE_UINT32:
453                 return new TestBuffer<deUint32>(atomicOp);
454         case DATA_TYPE_FLOAT32:
455                 return new TestBufferFloatingPoint<float>(atomicOp);
456         case DATA_TYPE_INT64:
457                 return new TestBuffer<deInt64>(atomicOp);
458         case DATA_TYPE_UINT64:
459                 return new TestBuffer<deUint64>(atomicOp);
460         case DATA_TYPE_FLOAT64:
461                 return new TestBufferFloatingPoint<double>(atomicOp);
462         default:
463                 DE_ASSERT(false);
464                 return DE_NULL;
465         }
466 }
467
468 // Use template to handle both signed and unsigned cases. SPIR-V should
469 // have separate operations for both.
470 template<typename T>
471 void TestBuffer<T>::checkOperation (const BufferData<T>&        original,
472                                                                         const BufferData<T>&    result,
473                                                                         tcu::ResultCollector&   resultCollector)
474 {
475         // originalInout = original inout
476         // input0 = input at index i
477         // iinput1 = input at index i + NUM_ELEMENTS / 2
478         //
479         // atomic operation will return the memory contents before
480         // the operation and this is stored as output. Two operations
481         // are executed for each InOut value (using input0 and input1).
482         //
483         // Since there is an overlap of two operations per each
484         // InOut element, the outcome of the resulting InOut and
485         // the outputs of the operations have two result candidates
486         // depending on the execution order. Verification passes
487         // if the results match one of these options.
488
489         for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
490         {
491                 // Needed when reinterpeting the data as signed values.
492                 const T originalInout   = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
493                 const T input0                  = *reinterpret_cast<const T*>(&original.input[elementNdx]);
494                 const T input1                  = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
495
496                 // Expected results are collected to this vector.
497                 vector<Expected<T> > exp;
498
499                 switch (m_atomicOp)
500                 {
501                         case ATOMIC_OP_ADD:
502                         {
503                                 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
504                                 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
505                         }
506                         break;
507
508                         case ATOMIC_OP_AND:
509                         {
510                                 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
511                                 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
512                         }
513                         break;
514
515                         case ATOMIC_OP_OR:
516                         {
517                                 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
518                                 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
519                         }
520                         break;
521
522                         case ATOMIC_OP_XOR:
523                         {
524                                 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
525                                 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
526                         }
527                         break;
528
529                         case ATOMIC_OP_MIN:
530                         {
531                                 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
532                                 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
533                         }
534                         break;
535
536                         case ATOMIC_OP_MAX:
537                         {
538                                 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
539                                 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
540                         }
541                         break;
542
543                         case ATOMIC_OP_EXCHANGE:
544                         {
545                                 exp.push_back(Expected<T>(input1, originalInout, input0));
546                                 exp.push_back(Expected<T>(input0, input1, originalInout));
547                         }
548                         break;
549
550                         case ATOMIC_OP_COMP_SWAP:
551                         {
552                                 if (elementNdx % 2 == 0)
553                                 {
554                                         exp.push_back(Expected<T>(input0, originalInout, input0));
555                                         exp.push_back(Expected<T>(input0, originalInout, originalInout));
556                                 }
557                                 else
558                                 {
559                                         exp.push_back(Expected<T>(input1, input1, originalInout));
560                                         exp.push_back(Expected<T>(input1, originalInout, originalInout));
561                                 }
562                         }
563                         break;
564
565
566                         default:
567                                 DE_FATAL("Unexpected atomic operation.");
568                                 break;
569                 };
570
571                 const T resIo           = result.inout[elementNdx];
572                 const T resOutput0      = result.output[elementNdx];
573                 const T resOutput1      = result.output[elementNdx + NUM_ELEMENTS / 2];
574
575
576                 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
577                 {
578                         std::ostringstream errorMessage;
579                         errorMessage    << "ERROR: Result value check failed at index " << elementNdx
580                                                         << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
581                                                         << ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
582                                                         << tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
583                                                         << ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
584                                                         << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
585                                                         << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
586                                                         << tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
587                                                         << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
588
589                         resultCollector.fail(errorMessage.str());
590                 }
591         }
592 }
593
594 // Use template to handle both float and double cases. SPIR-V should
595 // have separate operations for both.
596 template<typename T>
597 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
598         const BufferDataFloatingPoint<T>& result,
599         tcu::ResultCollector& resultCollector)
600 {
601         // originalInout = original inout
602         // input0 = input at index i
603         // iinput1 = input at index i + NUM_ELEMENTS / 2
604         //
605         // atomic operation will return the memory contents before
606         // the operation and this is stored as output. Two operations
607         // are executed for each InOut value (using input0 and input1).
608         //
609         // Since there is an overlap of two operations per each
610         // InOut element, the outcome of the resulting InOut and
611         // the outputs of the operations have two result candidates
612         // depending on the execution order. Verification passes
613         // if the results match one of these options.
614
615         for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
616         {
617                 // Needed when reinterpeting the data as signed values.
618                 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
619                 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
620                 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
621
622                 // Expected results are collected to this vector.
623                 vector<Expected<T> > exp;
624
625                 switch (m_atomicOp)
626                 {
627                 case ATOMIC_OP_ADD:
628                 {
629                         exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
630                         exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
631                 }
632                 break;
633
634                 case ATOMIC_OP_EXCHANGE:
635                 {
636                         exp.push_back(Expected<T>(input1, originalInout, input0));
637                         exp.push_back(Expected<T>(input0, input1, originalInout));
638                 }
639                 break;
640
641                 default:
642                         DE_FATAL("Unexpected atomic operation.");
643                         break;
644                 };
645
646                 const T resIo = result.inout[elementNdx];
647                 const T resOutput0 = result.output[elementNdx];
648                 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
649
650
651                 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
652                 {
653                         std::ostringstream errorMessage;
654                         errorMessage << "ERROR: Result value check failed at index " << elementNdx
655                                 << ". Expected one of the two outcomes: InOut = " << exp[0].m_inout
656                                 << ", Output0 = " << exp[0].m_output[0] << ", Output1 = "
657                                 << exp[0].m_output[1] << ", or InOut = " << exp[1].m_inout
658                                 << ", Output0 = " << exp[1].m_output[0] << ", Output1 = "
659                                 << exp[1].m_output[1] << ". Got: InOut = " << resIo
660                                 << ", Output0 = " << resOutput0 << ", Output1 = "
661                                 << resOutput1 << ". Using Input0 = " << original.input[elementNdx]
662                                 << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
663
664                         resultCollector.fail(errorMessage.str());
665                 }
666         }
667 }
668
669 class AtomicOperationCaseInstance : public TestInstance
670 {
671 public:
672                                                                         AtomicOperationCaseInstance             (Context&                       context,
673                                                                                                                                          const ShaderSpec&      shaderSpec,
674                                                                                                                                          AtomicShaderType       shaderType,
675                                                                                                                                          DataType                       dataType,
676                                                                                                                                          AtomicOperation        atomicOp);
677
678         virtual tcu::TestStatus                 iterate                                                 (void);
679
680 private:
681         const ShaderSpec&                               m_shaderSpec;
682         AtomicShaderType                                m_shaderType;
683         const DataType                                  m_dataType;
684         AtomicOperation                                 m_atomicOp;
685
686 };
687
688 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context&                              context,
689                                                                                                                   const ShaderSpec&             shaderSpec,
690                                                                                                                   AtomicShaderType              shaderType,
691                                                                                                                   DataType                              dataType,
692                                                                                                                   AtomicOperation               atomicOp)
693         : TestInstance  (context)
694         , m_shaderSpec  (shaderSpec)
695         , m_shaderType  (shaderType)
696         , m_dataType    (dataType)
697         , m_atomicOp    (atomicOp)
698 {
699 }
700
701 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
702 {
703         de::UniquePtr<BufferInterface>  testBuffer      (createTestBuffer(m_dataType, m_atomicOp));
704         tcu::TestLog&                                   log                     = m_context.getTestContext().getLog();
705         const DeviceInterface&                  vkd                     = m_context.getDeviceInterface();
706         const VkDevice                                  device          = m_context.getDevice();
707         de::Random                                              rnd                     (0x62a15e34);
708         const bool                                              useRef          = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
709         const VkDescriptorType                  descType        = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
710         const VkBufferUsageFlags                usageFlags      = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
711
712         // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
713         // a uniform buffer. If not, it will be passed directly as a descriptor.
714         Buffer                                                  buffer          (m_context, usageFlags, testBuffer->bufferSize(), useRef);
715         std::unique_ptr<Buffer>                 auxBuffer;
716
717         if (useRef)
718         {
719                 // Pass the main buffer address inside a uniform buffer.
720                 const VkBufferDeviceAddressInfo addressInfo =
721                 {
722                         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,   //      VkStructureType sType;
723                         nullptr,                                                                                //      const void*             pNext;
724                         buffer.getBuffer(),                                                             //      VkBuffer                buffer;
725                 };
726                 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
727
728                 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
729                 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
730                 auxBuffer->flush();
731         }
732
733         testBuffer->setBuffer(buffer.getHostPtr());
734         testBuffer->fillWithTestData(rnd);
735
736         buffer.flush();
737
738         Move<VkDescriptorSetLayout>     extraResourcesLayout;
739         Move<VkDescriptorPool>          extraResourcesSetPool;
740         Move<VkDescriptorSet>           extraResourcesSet;
741
742         const VkDescriptorSetLayoutBinding bindings[] =
743         {
744                 { 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
745         };
746
747         const VkDescriptorSetLayoutCreateInfo   layoutInfo      =
748         {
749                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
750                 DE_NULL,
751                 (VkDescriptorSetLayoutCreateFlags)0u,
752                 DE_LENGTH_OF_ARRAY(bindings),
753                 bindings
754         };
755
756         extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
757
758         const VkDescriptorPoolSize poolSizes[] =
759         {
760                 { descType, 1u }
761         };
762
763         const VkDescriptorPoolCreateInfo poolInfo =
764         {
765                 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
766                 DE_NULL,
767                 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
768                 1u,             // maxSets
769                 DE_LENGTH_OF_ARRAY(poolSizes),
770                 poolSizes
771         };
772
773         extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
774
775         const VkDescriptorSetAllocateInfo allocInfo =
776         {
777                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
778                 DE_NULL,
779                 *extraResourcesSetPool,
780                 1u,
781                 &extraResourcesLayout.get()
782         };
783
784         extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
785
786         VkDescriptorBufferInfo bufferInfo;
787         bufferInfo.buffer       = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
788         bufferInfo.offset       = 0u;
789         bufferInfo.range        = VK_WHOLE_SIZE;
790
791         const VkWriteDescriptorSet descriptorWrite =
792         {
793                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
794                 DE_NULL,
795                 *extraResourcesSet,
796                 0u,             // dstBinding
797                 0u,             // dstArrayElement
798                 1u,
799                 descType,
800                 (const VkDescriptorImageInfo*)DE_NULL,
801                 &bufferInfo,
802                 (const VkBufferView*)DE_NULL
803         };
804
805         vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
806
807         // Storage for output varying data.
808         std::vector<deUint32>   outputs         (NUM_ELEMENTS);
809         std::vector<void*>              outputPtr       (NUM_ELEMENTS);
810
811         for (size_t i = 0; i < NUM_ELEMENTS; i++)
812         {
813                 outputs[i] = 0xcdcdcdcd;
814                 outputPtr[i] = &outputs[i];
815         }
816
817         const int                                       numWorkGroups   = ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
818         UniquePtr<ShaderExecutor>       executor                (createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
819
820         executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
821         buffer.invalidate();
822
823         tcu::ResultCollector resultCollector(log);
824
825         // Check the results of the atomic operation
826         testBuffer->checkResults(resultCollector);
827
828         return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
829 }
830
831 class AtomicOperationCase : public TestCase
832 {
833 public:
834                                                         AtomicOperationCase             (tcu::TestContext&              testCtx,
835                                                                                                          const char*                    name,
836                                                                                                          const char*                    description,
837                                                                                                          AtomicShaderType               type,
838                                                                                                          DataType                               dataType,
839                                                                                                          AtomicOperation                atomicOp);
840         virtual                                 ~AtomicOperationCase    (void);
841
842         virtual TestInstance*   createInstance                  (Context& ctx) const;
843         virtual void                    checkSupport                    (Context& ctx) const;
844         virtual void                    initPrograms                    (vk::SourceCollections& programCollection) const
845         {
846                 generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
847         }
848
849 private:
850
851         void                                    createShaderSpec();
852         ShaderSpec                              m_shaderSpec;
853         const AtomicShaderType  m_shaderType;
854         const DataType                  m_dataType;
855         const AtomicOperation   m_atomicOp;
856 };
857
858 AtomicOperationCase::AtomicOperationCase (tcu::TestContext&     testCtx,
859                                                                                   const char*           name,
860                                                                                   const char*           description,
861                                                                                   AtomicShaderType      shaderType,
862                                                                                   DataType                      dataType,
863                                                                                   AtomicOperation       atomicOp)
864         : TestCase                      (testCtx, name, description)
865         , m_shaderType          (shaderType)
866         , m_dataType            (dataType)
867         , m_atomicOp            (atomicOp)
868 {
869         createShaderSpec();
870         init();
871 }
872
873 AtomicOperationCase::~AtomicOperationCase (void)
874 {
875 }
876
877 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
878 {
879         return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
880 }
881
882 void AtomicOperationCase::checkSupport (Context& ctx) const
883 {
884         if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
885         {
886                 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
887
888                 const auto atomicInt64Features  = ctx.getShaderAtomicInt64Features();
889                 const bool isSharedMemory               = (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
890
891                 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
892                 {
893                         TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
894                 }
895                 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
896                 {
897                         TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
898                 }
899         }
900
901         if (m_dataType == DATA_TYPE_FLOAT32)
902         {
903                 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
904                 if (m_atomicOp == ATOMIC_OP_ADD)
905                 {
906                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
907                         {
908                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
909                                 {
910                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
911                                 }
912                         }
913                         else
914                         {
915                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
916                                 {
917                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
918                                 }
919                         }
920                 }
921                 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
922                 {
923                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
924                         {
925                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
926                                 {
927                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
928                                 }
929                         }
930                         else
931                         {
932                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
933                                 {
934                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
935                                 }
936                         }
937                 }
938         }
939
940         if (m_dataType == DATA_TYPE_FLOAT64)
941         {
942                 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
943                 if (m_atomicOp == ATOMIC_OP_ADD)
944                 {
945                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
946                         {
947                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
948                                 {
949                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
950                                 }
951                         }
952                         else
953                         {
954                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
955                                 {
956                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
957                                 }
958                         }
959                 }
960                 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
961                 {
962                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
963                         {
964                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
965                                 {
966                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
967                                 }
968                         }
969                         else
970                         {
971                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
972                                 {
973                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
974                                 }
975                         }
976                 }
977         }
978
979         if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
980         {
981                 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
982         }
983
984         // Check stores and atomic operation support.
985         switch (m_shaderType.getType())
986         {
987         case glu::SHADERTYPE_VERTEX:
988         case glu::SHADERTYPE_TESSELLATION_CONTROL:
989         case glu::SHADERTYPE_TESSELLATION_EVALUATION:
990         case glu::SHADERTYPE_GEOMETRY:
991                 if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
992                         TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
993                 break;
994         case glu::SHADERTYPE_FRAGMENT:
995                 if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
996                         TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
997                 break;
998         case glu::SHADERTYPE_COMPUTE:
999                 break;
1000         default:
1001                 DE_FATAL("Unsupported shader type");
1002         }
1003
1004         checkSupportShader(ctx, m_shaderType.getType());
1005 }
1006
1007 void AtomicOperationCase::createShaderSpec (void)
1008 {
1009         const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1010
1011         // Global declarations.
1012         std::ostringstream shaderTemplateGlobalStream;
1013
1014         // Structure in use for atomic operations.
1015         shaderTemplateGlobalStream
1016                 << "${EXTENSIONS}\n"
1017                 << "\n"
1018                 << "struct AtomicStruct\n"
1019                 << "{\n"
1020                 << "    ${DATATYPE} inoutValues[${N}/2];\n"
1021                 << "    ${DATATYPE} inputValues[${N}];\n"
1022                 << "    ${DATATYPE} compareValues[${N}];\n"
1023                 << "    ${DATATYPE} outputValues[${N}];\n"
1024                 << "    int invocationHitCount[${N}];\n"
1025                 << "    int index;\n"
1026                 << "};\n"
1027                 << "\n"
1028                 ;
1029
1030         // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1031         // as "buf.data", which is the name used in the atomic operation statements.
1032         //
1033         // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1034         // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1035         // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1036         //
1037         if (memoryType != AtomicMemoryType::REFERENCE)
1038         {
1039                 shaderTemplateGlobalStream
1040                         << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1041                         << "    AtomicStruct data;\n"
1042                         << "} ${RESULT_BUFFER_NAME};\n"
1043                         << "\n"
1044                         ;
1045
1046                 // When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
1047                 // descriptor set as the sources and results of each tested operation.
1048                 if (memoryType == AtomicMemoryType::SHARED)
1049                 {
1050                         shaderTemplateGlobalStream
1051                                 << "shared struct { AtomicStruct data; } buf;\n"
1052                                 << "\n"
1053                                 ;
1054                 }
1055         }
1056         else
1057         {
1058                 shaderTemplateGlobalStream
1059                         << "layout (buffer_reference) buffer AtomicBuffer {\n"
1060                         << "    AtomicStruct data;\n"
1061                         << "};\n"
1062                         << "\n"
1063                         << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1064                         << "    AtomicBuffer buf;\n"
1065                         << "};\n"
1066                         << "\n"
1067                         ;
1068         }
1069
1070         const auto                                      shaderTemplateGlobalString      = shaderTemplateGlobalStream.str();
1071         const tcu::StringTemplate       shaderTemplateGlobal            (shaderTemplateGlobalString);
1072
1073         // Shader body for the non-vertex case.
1074         std::ostringstream nonVertexShaderTemplateStream;
1075
1076         if (memoryType == AtomicMemoryType::SHARED)
1077         {
1078                 // Invocation zero will initialize the shared structure from the descriptor set.
1079                 nonVertexShaderTemplateStream
1080                         << "if (gl_LocalInvocationIndex == 0u)\n"
1081                         << "{\n"
1082                         << "    buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1083                         << "}\n"
1084                         << "barrier();\n"
1085                         ;
1086         }
1087
1088         if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1089         {
1090                 nonVertexShaderTemplateStream
1091                         << "if (!gl_HelperInvocation) {\n"
1092                         << "    int idx = atomicAdd(buf.data.index, 1);\n"
1093                         << "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1094                         << "}\n"
1095                         ;
1096         }
1097         else
1098         {
1099                 nonVertexShaderTemplateStream
1100                         << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1101                         << "{\n"
1102                         << "    int idx = atomicAdd(buf.data.index, 1);\n"
1103                         << "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1104                         << "}\n"
1105                         ;
1106         }
1107
1108         if (memoryType == AtomicMemoryType::SHARED)
1109         {
1110                 // Invocation zero will copy results back to the descriptor set.
1111                 nonVertexShaderTemplateStream
1112                         << "barrier();\n"
1113                         << "if (gl_LocalInvocationIndex == 0u)\n"
1114                         << "{\n"
1115                         << "    ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1116                         << "}\n"
1117                         ;
1118         }
1119
1120         const auto                                      nonVertexShaderTemplateStreamStr        = nonVertexShaderTemplateStream.str();
1121         const tcu::StringTemplate       nonVertexShaderTemplateSrc                      (nonVertexShaderTemplateStreamStr);
1122
1123         // Shader body for the vertex case.
1124         const tcu::StringTemplate vertexShaderTemplateSrc(
1125                 "int idx = gl_VertexIndex;\n"
1126                 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1127                 "{\n"
1128                 "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1129                 "}\n");
1130
1131         // Extensions.
1132         std::ostringstream extensions;
1133
1134         if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1135         {
1136                 extensions
1137                         << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1138                         << "#extension GL_EXT_shader_atomic_int64 : enable\n"
1139                         ;
1140         }
1141         else if ((m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1142         {
1143                 extensions
1144                         << "#extension GL_EXT_shader_atomic_float : enable\n"
1145                         << "#extension GL_KHR_memory_scope_semantics : enable\n"
1146                         ;
1147         }
1148
1149         if (memoryType == AtomicMemoryType::REFERENCE)
1150         {
1151                 extensions << "#extension GL_EXT_buffer_reference : require\n";
1152         }
1153
1154         // Specializations.
1155         std::map<std::string, std::string> specializations;
1156
1157         specializations["EXTENSIONS"]                   = extensions.str();
1158         specializations["DATATYPE"]                             = dataType2Str(m_dataType);
1159         specializations["ATOMICOP"]                             = atomicOp2Str(m_atomicOp);
1160         specializations["SETIDX"]                               = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1161         specializations["N"]                                    = de::toString((int)NUM_ELEMENTS);
1162         specializations["COMPARE_ARG"]                  = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1163         specializations["RESULT_BUFFER_NAME"]   = ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
1164
1165         // Shader spec.
1166         m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1167         m_shaderSpec.glslVersion                = glu::GLSL_VERSION_450;
1168         m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1169         m_shaderSpec.source                             = ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1170                                                                                 ? vertexShaderTemplateSrc.specialize(specializations)
1171                                                                                 : nonVertexShaderTemplateSrc.specialize(specializations));
1172
1173         if (memoryType == AtomicMemoryType::SHARED)
1174         {
1175                 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1176                 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1177         }
1178 }
1179
1180 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1181 {
1182         tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1183
1184         static const struct
1185         {
1186                 glu::ShaderType         type;
1187                 const char*                     name;
1188         } shaderTypes[] =
1189         {
1190                 { glu::SHADERTYPE_VERTEX,                                                       "vertex"                        },
1191                 { glu::SHADERTYPE_FRAGMENT,                                                     "fragment"                      },
1192                 { glu::SHADERTYPE_GEOMETRY,                                                     "geometry"                      },
1193                 { glu::SHADERTYPE_TESSELLATION_CONTROL,                         "tess_ctrl"                     },
1194                 { glu::SHADERTYPE_TESSELLATION_EVALUATION,                      "tess_eval"                     },
1195                 { glu::SHADERTYPE_COMPUTE,                                                      "compute"                       },
1196         };
1197
1198         static const struct
1199         {
1200                 AtomicMemoryType        type;
1201                 const char*                     suffix;
1202         } kMemoryTypes[] =
1203         {
1204                 { AtomicMemoryType::BUFFER,             ""                              },
1205                 { AtomicMemoryType::SHARED,             "_shared"               },
1206                 { AtomicMemoryType::REFERENCE,  "_reference"    },
1207         };
1208
1209         static const struct
1210         {
1211                 DataType                dataType;
1212                 const char*             name;
1213                 const char*             description;
1214         } dataSign[] =
1215         {
1216                 { DATA_TYPE_INT32,      "signed",                       "Tests using signed data (int)"                         },
1217                 { DATA_TYPE_UINT32,     "unsigned",                     "Tests using unsigned data (uint)"                      },
1218                 { DATA_TYPE_FLOAT32,"float32",                  "Tests using 32-bit float data"                         },
1219                 { DATA_TYPE_INT64,      "signed64bit",          "Tests using 64 bit signed data (int64)"        },
1220                 { DATA_TYPE_UINT64,     "unsigned64bit",        "Tests using 64 bit unsigned data (uint64)"     },
1221                 { DATA_TYPE_FLOAT64,"float64",                  "Tests using 64-bit float data)"                        }
1222         };
1223
1224         static const struct
1225         {
1226                 AtomicOperation         value;
1227                 const char*                     name;
1228         } atomicOp[] =
1229         {
1230                 { ATOMIC_OP_EXCHANGE,   "exchange"      },
1231                 { ATOMIC_OP_COMP_SWAP,  "comp_swap"     },
1232                 { ATOMIC_OP_ADD,                "add"           },
1233                 { ATOMIC_OP_MIN,                "min"           },
1234                 { ATOMIC_OP_MAX,                "max"           },
1235                 { ATOMIC_OP_AND,                "and"           },
1236                 { ATOMIC_OP_OR,                 "or"            },
1237                 { ATOMIC_OP_XOR,                "xor"           }
1238         };
1239
1240         for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1241         {
1242                 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1243                 {
1244                         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1245                         {
1246                                 // Only ADD and EXCHANGE are supported on floating-point
1247                                 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1248                                 {
1249                                         if (atomicOp[opNdx].value != ATOMIC_OP_ADD && atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1250                                         {
1251                                                 continue;
1252                                         }
1253                                 }
1254
1255                                 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1256                                 {
1257                                         // Shared memory only available in compute shaders.
1258                                         if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
1259                                                 continue;
1260
1261                                         const std::string description   = std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1262                                         const std::string name                  = std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1263
1264                                         atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1265                                 }
1266                         }
1267                 }
1268         }
1269 }
1270
1271 } // anonymous
1272
1273 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1274 {
1275         return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
1276 }
1277
1278 } // shaderexecutor
1279 } // vkt