Merge pull request #276 from Ella-0/master
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / shaderexecutor / vktAtomicOperationTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2017 Google Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Atomic operations (OpAtomic*) tests.
23  *//*--------------------------------------------------------------------*/
24
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39
40 #include "deFloat16.h"
41 #include "deMath.hpp"
42 #include "deStringUtil.hpp"
43 #include "deSharedPtr.hpp"
44 #include "deRandom.hpp"
45 #include "deArrayUtil.hpp"
46
47 #include <string>
48 #include <memory>
49 #include <cmath>
50
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55
56 namespace
57 {
58
59 using de::UniquePtr;
60 using de::MovePtr;
61 using std::vector;
62
63 using namespace vk;
64
65 enum class AtomicMemoryType
66 {
67         BUFFER = 0,     // Normal buffer.
68         SHARED,         // Shared global struct in a compute workgroup.
69         REFERENCE,      // Buffer passed as a reference.
70 };
71
72 // Helper struct to indicate the shader type and if it should use shared global memory.
73 class AtomicShaderType
74 {
75 public:
76         AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
77                 : m_type                                (type)
78                 , m_atomicMemoryType    (memoryType)
79         {
80                 // Shared global memory can only be set to true with compute shaders.
81                 DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
82         }
83
84         glu::ShaderType         getType                                 (void) const    { return m_type; }
85         AtomicMemoryType        getMemoryType                   (void) const    { return m_atomicMemoryType; }
86
87 private:
88         glu::ShaderType         m_type;
89         AtomicMemoryType        m_atomicMemoryType;
90 };
91
92 // Buffer helper
93 class Buffer
94 {
95 public:
96                                                 Buffer                          (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
97
98         VkBuffer                        getBuffer                       (void) const { return *m_buffer;                                        }
99         void*                           getHostPtr                      (void) const { return m_allocation->getHostPtr();       }
100         void                            flush                           (void);
101         void                            invalidate                      (void);
102
103 private:
104         const DeviceInterface&          m_vkd;
105         const VkDevice                          m_device;
106         const VkQueue                           m_queue;
107         const deUint32                          m_queueIndex;
108         const Unique<VkBuffer>          m_buffer;
109         const UniquePtr<Allocation>     m_allocation;
110 };
111
112 typedef de::SharedPtr<Buffer> BufferSp;
113
114 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
115 {
116         const VkBufferCreateInfo createInfo     =
117         {
118                 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
119                 DE_NULL,
120                 (VkBufferCreateFlags)0,
121                 size,
122                 usageFlags,
123                 VK_SHARING_MODE_EXCLUSIVE,
124                 0u,
125                 DE_NULL
126         };
127         return createBuffer(vkd, device, &createInfo);
128 }
129
130 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
131 {
132         const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
133         MovePtr<Allocation>     alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
134
135         VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
136
137         return alloc;
138 }
139
140 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
141         : m_vkd                 (context.getDeviceInterface())
142         , m_device              (context.getDevice())
143         , m_queue               (context.getUniversalQueue())
144         , m_queueIndex  (context.getUniversalQueueFamilyIndex())
145         , m_buffer              (createBuffer                   (context.getDeviceInterface(),
146                                                                                          context.getDevice(),
147                                                                                          (VkDeviceSize)size,
148                                                                                          usage))
149         , m_allocation  (allocateAndBindMemory  (context.getDeviceInterface(),
150                                                                                          context.getDevice(),
151                                                                                          context.getDefaultAllocator(),
152                                                                                          *m_buffer,
153                                                                                          useRef))
154 {
155 }
156
157 void Buffer::flush (void)
158 {
159         flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
160 }
161
162 void Buffer::invalidate (void)
163 {
164         const auto      cmdPool                 = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
165         const auto      cmdBufferPtr    = vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
166         const auto      cmdBuffer               = cmdBufferPtr.get();
167         const auto      bufferBarrier   = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
168
169         beginCommandBuffer(m_vkd, cmdBuffer);
170         m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
171         endCommandBuffer(m_vkd, cmdBuffer);
172         submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
173
174         invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
175 }
176
177 // Tests
178
179 enum AtomicOperation
180 {
181         ATOMIC_OP_EXCHANGE = 0,
182         ATOMIC_OP_COMP_SWAP,
183         ATOMIC_OP_ADD,
184         ATOMIC_OP_MIN,
185         ATOMIC_OP_MAX,
186         ATOMIC_OP_AND,
187         ATOMIC_OP_OR,
188         ATOMIC_OP_XOR,
189
190         ATOMIC_OP_LAST
191 };
192
193 std::string atomicOp2Str (AtomicOperation op)
194 {
195         static const char* const s_names[] =
196         {
197                 "atomicExchange",
198                 "atomicCompSwap",
199                 "atomicAdd",
200                 "atomicMin",
201                 "atomicMax",
202                 "atomicAnd",
203                 "atomicOr",
204                 "atomicXor"
205         };
206         return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
207 }
208
209 enum
210 {
211         NUM_ELEMENTS = 32
212 };
213
214 enum DataType
215 {
216         DATA_TYPE_FLOAT16 = 0,
217         DATA_TYPE_INT32,
218         DATA_TYPE_UINT32,
219         DATA_TYPE_FLOAT32,
220         DATA_TYPE_INT64,
221         DATA_TYPE_UINT64,
222         DATA_TYPE_FLOAT64,
223
224         DATA_TYPE_LAST
225 };
226
227 std::string dataType2Str(DataType type)
228 {
229         static const char* const s_names[] =
230         {
231                 "float16_t",
232                 "int",
233                 "uint",
234                 "float",
235                 "int64_t",
236                 "uint64_t",
237                 "double",
238         };
239         return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
240 }
241
242 class BufferInterface
243 {
244 public:
245         virtual void setBuffer(void* ptr) = 0;
246
247         virtual size_t bufferSize() = 0;
248
249         virtual void fillWithTestData(de::Random &rnd) = 0;
250
251         virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
252
253         virtual ~BufferInterface() {};
254 };
255
256 template<typename dataTypeT>
257 class TestBuffer : public BufferInterface
258 {
259 public:
260
261         TestBuffer(AtomicOperation      atomicOp)
262                 : m_atomicOp(atomicOp)
263         {}
264
265         template<typename T>
266         struct BufferData
267         {
268                 // Use half the number of elements for inout to cause overlap between atomic operations.
269                 // Each inout element at index i will have two atomic operations using input from
270                 // indices i and i + NUM_ELEMENTS / 2.
271                 T                       inout[NUM_ELEMENTS / 2];
272                 T                       input[NUM_ELEMENTS];
273                 T                       compare[NUM_ELEMENTS];
274                 T                       output[NUM_ELEMENTS];
275                 T                       invocationHitCount[NUM_ELEMENTS];
276                 deInt32         index;
277         };
278
279         virtual void setBuffer(void* ptr)
280         {
281                 m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
282         }
283
284         virtual size_t bufferSize()
285         {
286                 return sizeof(BufferData<dataTypeT>);
287         }
288
289         virtual void fillWithTestData(de::Random &rnd)
290         {
291                 dataTypeT pattern;
292                 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
293
294                 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
295                 {
296                         m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
297                         // The first half of compare elements match with every even index.
298                         // The second half matches with odd indices. This causes the
299                         // overlapping operations to only select one.
300                         m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
301                         m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
302                 }
303                 for (int i = 0; i < NUM_ELEMENTS; i++)
304                 {
305                         m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
306                         m_ptr->output[i] = pattern;
307                         m_ptr->invocationHitCount[i] = 0;
308                 }
309                 m_ptr->index = 0;
310
311                 // Take a copy to be used when calculating expected values.
312                 m_original = *m_ptr;
313         }
314
315         virtual void checkResults(tcu::ResultCollector& resultCollector)
316         {
317                 checkOperation(m_original, *m_ptr, resultCollector);
318         }
319
320         template<typename T>
321         struct Expected
322         {
323                 T m_inout;
324                 T m_output[2];
325
326                 Expected (T inout, T output0, T output1)
327                 : m_inout(inout)
328                 {
329                         m_output[0] = output0;
330                         m_output[1] = output1;
331                 }
332
333                 bool compare (T inout, T output0, T output1)
334                 {
335                         return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
336                                         && deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
337                                         && deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
338                 }
339         };
340
341         void checkOperation     (const BufferData<dataTypeT>&   original,
342                                                  const BufferData<dataTypeT>&   result,
343                                                  tcu::ResultCollector&                  resultCollector);
344
345         const AtomicOperation   m_atomicOp;
346
347         BufferData<dataTypeT>* m_ptr;
348         BufferData<dataTypeT>  m_original;
349
350 };
351
352 template<typename T>
353 bool nanSafeSloppyEquals(T x, T y)
354 {
355         if (deIsIEEENaN(x) && deIsIEEENaN(y))
356                 return true;
357
358         if (deIsIEEENaN(x) || deIsIEEENaN(y))
359                 return false;
360
361         return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
362 }
363
364 template<typename dataTypeT>
365 class TestBufferFloatingPoint : public BufferInterface
366 {
367 public:
368
369         TestBufferFloatingPoint(AtomicOperation atomicOp)
370                 : m_atomicOp(atomicOp)
371         {}
372
373         template<typename T>
374         struct BufferDataFloatingPoint
375         {
376                 // Use half the number of elements for inout to cause overlap between atomic operations.
377                 // Each inout element at index i will have two atomic operations using input from
378                 // indices i and i + NUM_ELEMENTS / 2.
379                 T                       inout[NUM_ELEMENTS / 2];
380                 T                       input[NUM_ELEMENTS];
381                 T                       compare[NUM_ELEMENTS];
382                 T                       output[NUM_ELEMENTS];
383                 deInt32         invocationHitCount[NUM_ELEMENTS];
384                 deInt32         index;
385         };
386
387         virtual void setBuffer(void* ptr)
388         {
389                 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
390         }
391
392         virtual size_t bufferSize()
393         {
394                 return sizeof(BufferDataFloatingPoint<dataTypeT>);
395         }
396
397         virtual void fillWithTestData(de::Random& rnd)
398         {
399                 dataTypeT pattern;
400                 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
401
402                 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
403                 {
404                         m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
405                         // These aren't used by any of the float tests
406                         m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
407                 }
408                 // Add special cases for NaN and +/-0
409                 // 0: min(sNaN, x)
410                 m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
411                 // 1: min(x, sNaN)
412                 m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
413                 // 2: min(qNaN, x)
414                 m_ptr->inout[2] = deQuietNaN<dataTypeT>();
415                 // 3: min(x, qNaN)
416                 m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
417                 // 4: min(NaN, NaN)
418                 m_ptr->inout[4] = deSignalingNaN<dataTypeT>();
419                 m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
420                 m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
421                 // 5: min(+0, -0)
422                 m_ptr->inout[5] = deToFloatType<dataTypeT>(-0.0);
423                 m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
424                 m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
425
426                 for (int i = 0; i < NUM_ELEMENTS; i++)
427                 {
428                         m_ptr->input[i] = deToFloatType<dataTypeT>(rnd.getFloat());
429                         m_ptr->output[i] = pattern;
430                         m_ptr->invocationHitCount[i] = 0;
431                 }
432
433                 m_ptr->index = 0;
434
435                 // Take a copy to be used when calculating expected values.
436                 m_original = *m_ptr;
437         }
438
439         virtual void checkResults(tcu::ResultCollector& resultCollector)
440         {
441                 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
442         }
443
444         template<typename T>
445         struct Expected
446         {
447                 T m_inout;
448                 T m_output[2];
449
450                 Expected(T inout, T output0, T output1)
451                         : m_inout(inout)
452                 {
453                         m_output[0] = output0;
454                         m_output[1] = output1;
455                 }
456
457                 bool compare(T inout, T output0, T output1)
458                 {
459                         return nanSafeSloppyEquals(m_inout, inout) &&
460                                nanSafeSloppyEquals(m_output[0], output0) &&
461                                nanSafeSloppyEquals(m_output[1], output1);
462                 }
463         };
464
465         void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
466                 const BufferDataFloatingPoint<dataTypeT>& result,
467                 tcu::ResultCollector& resultCollector);
468
469         const AtomicOperation   m_atomicOp;
470
471         BufferDataFloatingPoint<dataTypeT>* m_ptr;
472         BufferDataFloatingPoint<dataTypeT>  m_original;
473
474 };
475
476 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
477 {
478         switch (type)
479         {
480         case DATA_TYPE_FLOAT16:
481                 return new TestBufferFloatingPoint<deFloat16>(atomicOp);
482         case DATA_TYPE_INT32:
483                 return new TestBuffer<deInt32>(atomicOp);
484         case DATA_TYPE_UINT32:
485                 return new TestBuffer<deUint32>(atomicOp);
486         case DATA_TYPE_FLOAT32:
487                 return new TestBufferFloatingPoint<float>(atomicOp);
488         case DATA_TYPE_INT64:
489                 return new TestBuffer<deInt64>(atomicOp);
490         case DATA_TYPE_UINT64:
491                 return new TestBuffer<deUint64>(atomicOp);
492         case DATA_TYPE_FLOAT64:
493                 return new TestBufferFloatingPoint<double>(atomicOp);
494         default:
495                 DE_ASSERT(false);
496                 return DE_NULL;
497         }
498 }
499
500 // Use template to handle both signed and unsigned cases. SPIR-V should
501 // have separate operations for both.
502 template<typename T>
503 void TestBuffer<T>::checkOperation (const BufferData<T>&        original,
504                                                                         const BufferData<T>&    result,
505                                                                         tcu::ResultCollector&   resultCollector)
506 {
507         // originalInout = original inout
508         // input0 = input at index i
509         // iinput1 = input at index i + NUM_ELEMENTS / 2
510         //
511         // atomic operation will return the memory contents before
512         // the operation and this is stored as output. Two operations
513         // are executed for each InOut value (using input0 and input1).
514         //
515         // Since there is an overlap of two operations per each
516         // InOut element, the outcome of the resulting InOut and
517         // the outputs of the operations have two result candidates
518         // depending on the execution order. Verification passes
519         // if the results match one of these options.
520
521         for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
522         {
523                 // Needed when reinterpeting the data as signed values.
524                 const T originalInout   = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
525                 const T input0                  = *reinterpret_cast<const T*>(&original.input[elementNdx]);
526                 const T input1                  = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
527
528                 // Expected results are collected to this vector.
529                 vector<Expected<T> > exp;
530
531                 switch (m_atomicOp)
532                 {
533                         case ATOMIC_OP_ADD:
534                         {
535                                 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
536                                 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
537                         }
538                         break;
539
540                         case ATOMIC_OP_AND:
541                         {
542                                 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
543                                 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
544                         }
545                         break;
546
547                         case ATOMIC_OP_OR:
548                         {
549                                 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
550                                 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
551                         }
552                         break;
553
554                         case ATOMIC_OP_XOR:
555                         {
556                                 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
557                                 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
558                         }
559                         break;
560
561                         case ATOMIC_OP_MIN:
562                         {
563                                 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
564                                 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
565                         }
566                         break;
567
568                         case ATOMIC_OP_MAX:
569                         {
570                                 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
571                                 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
572                         }
573                         break;
574
575                         case ATOMIC_OP_EXCHANGE:
576                         {
577                                 exp.push_back(Expected<T>(input1, originalInout, input0));
578                                 exp.push_back(Expected<T>(input0, input1, originalInout));
579                         }
580                         break;
581
582                         case ATOMIC_OP_COMP_SWAP:
583                         {
584                                 if (elementNdx % 2 == 0)
585                                 {
586                                         exp.push_back(Expected<T>(input0, originalInout, input0));
587                                         exp.push_back(Expected<T>(input0, originalInout, originalInout));
588                                 }
589                                 else
590                                 {
591                                         exp.push_back(Expected<T>(input1, input1, originalInout));
592                                         exp.push_back(Expected<T>(input1, originalInout, originalInout));
593                                 }
594                         }
595                         break;
596
597
598                         default:
599                                 DE_FATAL("Unexpected atomic operation.");
600                                 break;
601                 };
602
603                 const T resIo           = result.inout[elementNdx];
604                 const T resOutput0      = result.output[elementNdx];
605                 const T resOutput1      = result.output[elementNdx + NUM_ELEMENTS / 2];
606
607
608                 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
609                 {
610                         std::ostringstream errorMessage;
611                         errorMessage    << "ERROR: Result value check failed at index " << elementNdx
612                                                         << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
613                                                         << ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
614                                                         << tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
615                                                         << ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
616                                                         << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
617                                                         << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
618                                                         << tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
619                                                         << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
620
621                         resultCollector.fail(errorMessage.str());
622                 }
623         }
624 }
625
626 template<typename T>
627 void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
628 {
629
630         if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
631         {
632                 values.push_back(deQuietNaN<T>());
633                 values.push_back(deSignalingNaN<T>());
634         }
635         else if (deIsSignalingNaN(x))
636         {
637                 values.push_back(deQuietNaN<T>());
638                 values.push_back(deSignalingNaN<T>());
639                 if (!deIsIEEENaN(y))
640                         values.push_back(y);
641         }
642         else if (deIsSignalingNaN(y))
643         {
644                 values.push_back(deQuietNaN<T>());
645                 values.push_back(deSignalingNaN<T>());
646                 if (!deIsIEEENaN(x))
647                         values.push_back(x);
648         }
649         else if (deIsIEEENaN(x) && deIsIEEENaN(y))
650         {
651                 // Both quiet NaNs
652                 values.push_back(deQuietNaN<T>());
653         }
654         else if (deIsIEEENaN(x))
655         {
656                 // One quiet NaN and one non-NaN.
657                 values.push_back(y);
658         }
659         else if (deIsIEEENaN(y))
660         {
661                 // One quiet NaN and one non-NaN.
662                 values.push_back(x);
663         }
664         else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
665         {
666                 values.push_back(deToFloatType<T>(0.0));
667                 values.push_back(deToFloatType<T>(-0.0));
668         }
669 }
670
671 template<typename T>
672 T floatAdd(T x, T y)
673 {
674         if (deIsIEEENaN(x) || deIsIEEENaN(y))
675                 return deQuietNaN<T>();
676         return deToFloatType<T>(deToDouble(x) + deToDouble(y));
677 }
678
679 template<typename T>
680 vector<T> floatMinValues(T x, T y)
681 {
682         vector<T> values;
683         handleExceptionalFloatMinMaxValues(values, x, y);
684         if (values.empty())
685         {
686                 values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
687         }
688         return values;
689 }
690
691 template<typename T>
692 vector<T> floatMaxValues(T x, T y)
693 {
694         vector<T> values;
695         handleExceptionalFloatMinMaxValues(values, x, y);
696         if (values.empty())
697         {
698                 values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
699         }
700         return values;
701 }
702
703 // Use template to handle both float and double cases. SPIR-V should
704 // have separate operations for both.
705 template<typename T>
706 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
707         const BufferDataFloatingPoint<T>& result,
708         tcu::ResultCollector& resultCollector)
709 {
710         // originalInout = original inout
711         // input0 = input at index i
712         // iinput1 = input at index i + NUM_ELEMENTS / 2
713         //
714         // atomic operation will return the memory contents before
715         // the operation and this is stored as output. Two operations
716         // are executed for each InOut value (using input0 and input1).
717         //
718         // Since there is an overlap of two operations per each
719         // InOut element, the outcome of the resulting InOut and
720         // the outputs of the operations have two result candidates
721         // depending on the execution order. Verification passes
722         // if the results match one of these options.
723
724         for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
725         {
726                 // Needed when reinterpeting the data as signed values.
727                 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
728                 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
729                 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
730
731                 // Expected results are collected to this vector.
732                 vector<Expected<T> > exp;
733
734                 switch (m_atomicOp)
735                 {
736                 case ATOMIC_OP_ADD:
737                 {
738                         exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout, floatAdd(originalInout, input0)));
739                         exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), floatAdd(originalInout, input1), originalInout));
740                 }
741                 break;
742
743                 case ATOMIC_OP_MIN:
744                 {
745                         // The case where input0 is combined first
746                         vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
747                         for (T x : minOriginalAndInput0)
748                         {
749                                 vector<T> minAll = floatMinValues(x, input1);
750                                 for (T y : minAll)
751                                 {
752                                         exp.push_back(Expected<T>(y, originalInout, x));
753                                 }
754                         }
755
756                         // The case where input1 is combined first
757                         vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
758                         for (T x : minOriginalAndInput1)
759                         {
760                                 vector<T> minAll = floatMinValues(x, input0);
761                                 for (T y : minAll)
762                                 {
763                                         exp.push_back(Expected<T>(y, x, originalInout));
764                                 }
765                         }
766                 }
767                 break;
768
769                 case ATOMIC_OP_MAX:
770                 {
771                         // The case where input0 is combined first
772                         vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
773                         for (T x : minOriginalAndInput0)
774                         {
775                                 vector<T> minAll = floatMaxValues(x, input1);
776                                 for (T y : minAll)
777                                 {
778                                         exp.push_back(Expected<T>(y, originalInout, x));
779                                 }
780                         }
781
782                         // The case where input1 is combined first
783                         vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
784                         for (T x : minOriginalAndInput1)
785                         {
786                                 vector<T> minAll = floatMaxValues(x, input0);
787                                 for (T y : minAll)
788                                 {
789                                         exp.push_back(Expected<T>(y, x, originalInout));
790                                 }
791                         }
792                 }
793                 break;
794
795                 case ATOMIC_OP_EXCHANGE:
796                 {
797                         exp.push_back(Expected<T>(input1, originalInout, input0));
798                         exp.push_back(Expected<T>(input0, input1, originalInout));
799                 }
800                 break;
801
802                 default:
803                         DE_FATAL("Unexpected atomic operation.");
804                         break;
805                 };
806
807                 const T resIo = result.inout[elementNdx];
808                 const T resOutput0 = result.output[elementNdx];
809                 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
810
811
812                 bool hasMatch = false;
813                 for (Expected<T> e : exp)
814                 {
815                         if (e.compare(resIo, resOutput0, resOutput1))
816                         {
817                                 hasMatch = true;
818                                 break;
819                         }
820                 }
821                 if (!hasMatch)
822                 {
823                         std::ostringstream errorMessage;
824                         errorMessage << "ERROR: Result value check failed at index " << elementNdx
825                                 << ". Expected one of the outcomes:";
826
827                         bool first = true;
828                         for (Expected<T> e : exp)
829                         {
830                                 if (!first)
831                                         errorMessage << ", or";
832                                 first = false;
833
834                                 errorMessage << " InOut = " << e.m_inout
835                                         << ", Output0 = " << e.m_output[0]
836                                         << ", Output1 = " << e.m_output[1];
837                         }
838
839                         errorMessage << ". Got: InOut = " << resIo
840                                 << ", Output0 = " << resOutput0
841                                 << ", Output1 = " << resOutput1
842                                 << ". Using Input0 = " << original.input[elementNdx]
843                                 << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
844
845                         resultCollector.fail(errorMessage.str());
846                 }
847         }
848 }
849
850 class AtomicOperationCaseInstance : public TestInstance
851 {
852 public:
853                                                                         AtomicOperationCaseInstance             (Context&                       context,
854                                                                                                                                          const ShaderSpec&      shaderSpec,
855                                                                                                                                          AtomicShaderType       shaderType,
856                                                                                                                                          DataType                       dataType,
857                                                                                                                                          AtomicOperation        atomicOp);
858
859         virtual tcu::TestStatus                 iterate                                                 (void);
860
861 private:
862         const ShaderSpec&                               m_shaderSpec;
863         AtomicShaderType                                m_shaderType;
864         const DataType                                  m_dataType;
865         AtomicOperation                                 m_atomicOp;
866
867 };
868
869 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context&                              context,
870                                                                                                                   const ShaderSpec&             shaderSpec,
871                                                                                                                   AtomicShaderType              shaderType,
872                                                                                                                   DataType                              dataType,
873                                                                                                                   AtomicOperation               atomicOp)
874         : TestInstance  (context)
875         , m_shaderSpec  (shaderSpec)
876         , m_shaderType  (shaderType)
877         , m_dataType    (dataType)
878         , m_atomicOp    (atomicOp)
879 {
880 }
881
882 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
883 {
884         de::UniquePtr<BufferInterface>  testBuffer      (createTestBuffer(m_dataType, m_atomicOp));
885         tcu::TestLog&                                   log                     = m_context.getTestContext().getLog();
886         const DeviceInterface&                  vkd                     = m_context.getDeviceInterface();
887         const VkDevice                                  device          = m_context.getDevice();
888         de::Random                                              rnd                     (0x62a15e34);
889         const bool                                              useRef          = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
890         const VkDescriptorType                  descType        = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
891         const VkBufferUsageFlags                usageFlags      = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
892
893         // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
894         // a uniform buffer. If not, it will be passed directly as a descriptor.
895         Buffer                                                  buffer          (m_context, usageFlags, testBuffer->bufferSize(), useRef);
896         std::unique_ptr<Buffer>                 auxBuffer;
897
898         if (useRef)
899         {
900                 // Pass the main buffer address inside a uniform buffer.
901                 const VkBufferDeviceAddressInfo addressInfo =
902                 {
903                         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,   //      VkStructureType sType;
904                         nullptr,                                                                                //      const void*             pNext;
905                         buffer.getBuffer(),                                                             //      VkBuffer                buffer;
906                 };
907                 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
908
909                 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
910                 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
911                 auxBuffer->flush();
912         }
913
914         testBuffer->setBuffer(buffer.getHostPtr());
915         testBuffer->fillWithTestData(rnd);
916
917         buffer.flush();
918
919         Move<VkDescriptorSetLayout>     extraResourcesLayout;
920         Move<VkDescriptorPool>          extraResourcesSetPool;
921         Move<VkDescriptorSet>           extraResourcesSet;
922
923         const VkDescriptorSetLayoutBinding bindings[] =
924         {
925                 { 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
926         };
927
928         const VkDescriptorSetLayoutCreateInfo   layoutInfo      =
929         {
930                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
931                 DE_NULL,
932                 (VkDescriptorSetLayoutCreateFlags)0u,
933                 DE_LENGTH_OF_ARRAY(bindings),
934                 bindings
935         };
936
937         extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
938
939         const VkDescriptorPoolSize poolSizes[] =
940         {
941                 { descType, 1u }
942         };
943
944         const VkDescriptorPoolCreateInfo poolInfo =
945         {
946                 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
947                 DE_NULL,
948                 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
949                 1u,             // maxSets
950                 DE_LENGTH_OF_ARRAY(poolSizes),
951                 poolSizes
952         };
953
954         extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
955
956         const VkDescriptorSetAllocateInfo allocInfo =
957         {
958                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
959                 DE_NULL,
960                 *extraResourcesSetPool,
961                 1u,
962                 &extraResourcesLayout.get()
963         };
964
965         extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
966
967         VkDescriptorBufferInfo bufferInfo;
968         bufferInfo.buffer       = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
969         bufferInfo.offset       = 0u;
970         bufferInfo.range        = VK_WHOLE_SIZE;
971
972         const VkWriteDescriptorSet descriptorWrite =
973         {
974                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
975                 DE_NULL,
976                 *extraResourcesSet,
977                 0u,             // dstBinding
978                 0u,             // dstArrayElement
979                 1u,
980                 descType,
981                 (const VkDescriptorImageInfo*)DE_NULL,
982                 &bufferInfo,
983                 (const VkBufferView*)DE_NULL
984         };
985
986         vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
987
988         // Storage for output varying data.
989         std::vector<deUint32>   outputs         (NUM_ELEMENTS);
990         std::vector<void*>              outputPtr       (NUM_ELEMENTS);
991
992         for (size_t i = 0; i < NUM_ELEMENTS; i++)
993         {
994                 outputs[i] = 0xcdcdcdcd;
995                 outputPtr[i] = &outputs[i];
996         }
997
998         const int                                       numWorkGroups   = ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
999         UniquePtr<ShaderExecutor>       executor                (createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
1000
1001         executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
1002         buffer.invalidate();
1003
1004         tcu::ResultCollector resultCollector(log);
1005
1006         // Check the results of the atomic operation
1007         testBuffer->checkResults(resultCollector);
1008
1009         return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
1010 }
1011
1012 class AtomicOperationCase : public TestCase
1013 {
1014 public:
1015                                                         AtomicOperationCase             (tcu::TestContext&              testCtx,
1016                                                                                                          const char*                    name,
1017                                                                                                          const char*                    description,
1018                                                                                                          AtomicShaderType               type,
1019                                                                                                          DataType                               dataType,
1020                                                                                                          AtomicOperation                atomicOp);
1021         virtual                                 ~AtomicOperationCase    (void);
1022
1023         virtual TestInstance*   createInstance                  (Context& ctx) const;
1024         virtual void                    checkSupport                    (Context& ctx) const;
1025         virtual void                    initPrograms                    (vk::SourceCollections& programCollection) const
1026         {
1027                 generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
1028         }
1029
1030 private:
1031
1032         void                                    createShaderSpec();
1033         ShaderSpec                              m_shaderSpec;
1034         const AtomicShaderType  m_shaderType;
1035         const DataType                  m_dataType;
1036         const AtomicOperation   m_atomicOp;
1037 };
1038
1039 AtomicOperationCase::AtomicOperationCase (tcu::TestContext&     testCtx,
1040                                                                                   const char*           name,
1041                                                                                   const char*           description,
1042                                                                                   AtomicShaderType      shaderType,
1043                                                                                   DataType                      dataType,
1044                                                                                   AtomicOperation       atomicOp)
1045         : TestCase                      (testCtx, name, description)
1046         , m_shaderType          (shaderType)
1047         , m_dataType            (dataType)
1048         , m_atomicOp            (atomicOp)
1049 {
1050         createShaderSpec();
1051         init();
1052 }
1053
1054 AtomicOperationCase::~AtomicOperationCase (void)
1055 {
1056 }
1057
1058 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
1059 {
1060         return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
1061 }
1062
1063 void AtomicOperationCase::checkSupport (Context& ctx) const
1064 {
1065         if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1066         {
1067                 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
1068
1069                 const auto atomicInt64Features  = ctx.getShaderAtomicInt64Features();
1070                 const bool isSharedMemory               = (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
1071
1072                 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
1073                 {
1074                         TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
1075                 }
1076                 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
1077                 {
1078                         TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
1079                 }
1080         }
1081
1082         if (m_dataType == DATA_TYPE_FLOAT16)
1083         {
1084                 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1085                 if (m_atomicOp == ATOMIC_OP_ADD)
1086                 {
1087                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1088                         {
1089                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
1090                                 {
1091                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
1092                                 }
1093                         }
1094                         else
1095                         {
1096                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
1097                                 {
1098                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
1099                                 }
1100                         }
1101                 }
1102                 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1103                 {
1104                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1105                         {
1106                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
1107                                 {
1108                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
1109                                 }
1110                         }
1111                         else
1112                         {
1113                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
1114                                 {
1115                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
1116                                 }
1117                         }
1118                 }
1119                 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1120                 {
1121                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1122                         {
1123                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
1124                                 {
1125                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
1126                                 }
1127                         }
1128                         else
1129                         {
1130                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
1131                                 {
1132                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
1133                                 }
1134                         }
1135                 }
1136         }
1137
1138         if (m_dataType == DATA_TYPE_FLOAT32)
1139         {
1140                 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1141                 if (m_atomicOp == ATOMIC_OP_ADD)
1142                 {
1143                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1144                         {
1145                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
1146                                 {
1147                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
1148                                 }
1149                         }
1150                         else
1151                         {
1152                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
1153                                 {
1154                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
1155                                 }
1156                         }
1157                 }
1158                 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1159                 {
1160                         ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1161                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1162                         {
1163                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
1164                                 {
1165                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
1166                                 }
1167                         }
1168                         else
1169                         {
1170                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
1171                                 {
1172                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
1173                                 }
1174                         }
1175                 }
1176                 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1177                 {
1178                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1179                         {
1180                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
1181                                 {
1182                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
1183                                 }
1184                         }
1185                         else
1186                         {
1187                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
1188                                 {
1189                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
1190                                 }
1191                         }
1192                 }
1193         }
1194
1195         if (m_dataType == DATA_TYPE_FLOAT64)
1196         {
1197                 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1198                 if (m_atomicOp == ATOMIC_OP_ADD)
1199                 {
1200                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1201                         {
1202                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
1203                                 {
1204                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
1205                                 }
1206                         }
1207                         else
1208                         {
1209                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
1210                                 {
1211                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
1212                                 }
1213                         }
1214                 }
1215                 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1216                 {
1217                         ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1218                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1219                         {
1220                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
1221                                 {
1222                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
1223                                 }
1224                         }
1225                         else
1226                         {
1227                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
1228                                 {
1229                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
1230                                 }
1231                         }
1232                 }
1233                 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1234                 {
1235                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1236                         {
1237                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
1238                                 {
1239                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
1240                                 }
1241                         }
1242                         else
1243                         {
1244                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
1245                                 {
1246                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
1247                                 }
1248                         }
1249                 }
1250         }
1251
1252         if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
1253         {
1254                 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
1255         }
1256
1257         // Check stores and atomic operation support.
1258         switch (m_shaderType.getType())
1259         {
1260         case glu::SHADERTYPE_VERTEX:
1261         case glu::SHADERTYPE_TESSELLATION_CONTROL:
1262         case glu::SHADERTYPE_TESSELLATION_EVALUATION:
1263         case glu::SHADERTYPE_GEOMETRY:
1264                 if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
1265                         TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
1266                 break;
1267         case glu::SHADERTYPE_FRAGMENT:
1268                 if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
1269                         TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
1270                 break;
1271         case glu::SHADERTYPE_COMPUTE:
1272                 break;
1273         default:
1274                 DE_FATAL("Unsupported shader type");
1275         }
1276
1277         checkSupportShader(ctx, m_shaderType.getType());
1278 }
1279
1280 void AtomicOperationCase::createShaderSpec (void)
1281 {
1282         const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1283
1284         // Global declarations.
1285         std::ostringstream shaderTemplateGlobalStream;
1286
1287         // Structure in use for atomic operations.
1288         shaderTemplateGlobalStream
1289                 << "${EXTENSIONS}\n"
1290                 << "\n"
1291                 << "struct AtomicStruct\n"
1292                 << "{\n"
1293                 << "    ${DATATYPE} inoutValues[${N}/2];\n"
1294                 << "    ${DATATYPE} inputValues[${N}];\n"
1295                 << "    ${DATATYPE} compareValues[${N}];\n"
1296                 << "    ${DATATYPE} outputValues[${N}];\n"
1297                 << "    int invocationHitCount[${N}];\n"
1298                 << "    int index;\n"
1299                 << "};\n"
1300                 << "\n"
1301                 ;
1302
1303         // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1304         // as "buf.data", which is the name used in the atomic operation statements.
1305         //
1306         // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1307         // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1308         // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1309         //
1310         if (memoryType != AtomicMemoryType::REFERENCE)
1311         {
1312                 shaderTemplateGlobalStream
1313                         << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1314                         << "    AtomicStruct data;\n"
1315                         << "} ${RESULT_BUFFER_NAME};\n"
1316                         << "\n"
1317                         ;
1318
1319                 // When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
1320                 // descriptor set as the sources and results of each tested operation.
1321                 if (memoryType == AtomicMemoryType::SHARED)
1322                 {
1323                         shaderTemplateGlobalStream
1324                                 << "shared struct { AtomicStruct data; } buf;\n"
1325                                 << "\n"
1326                                 ;
1327                 }
1328         }
1329         else
1330         {
1331                 shaderTemplateGlobalStream
1332                         << "layout (buffer_reference) buffer AtomicBuffer {\n"
1333                         << "    AtomicStruct data;\n"
1334                         << "};\n"
1335                         << "\n"
1336                         << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1337                         << "    AtomicBuffer buf;\n"
1338                         << "};\n"
1339                         << "\n"
1340                         ;
1341         }
1342
1343         const auto                                      shaderTemplateGlobalString      = shaderTemplateGlobalStream.str();
1344         const tcu::StringTemplate       shaderTemplateGlobal            (shaderTemplateGlobalString);
1345
1346         // Shader body for the non-vertex case.
1347         std::ostringstream nonVertexShaderTemplateStream;
1348
1349         if (memoryType == AtomicMemoryType::SHARED)
1350         {
1351                 // Invocation zero will initialize the shared structure from the descriptor set.
1352                 nonVertexShaderTemplateStream
1353                         << "if (gl_LocalInvocationIndex == 0u)\n"
1354                         << "{\n"
1355                         << "    buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1356                         << "}\n"
1357                         << "barrier();\n"
1358                         ;
1359         }
1360
1361         if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1362         {
1363                 nonVertexShaderTemplateStream
1364                         << "if (!gl_HelperInvocation) {\n"
1365                         << "    int idx = atomicAdd(buf.data.index, 1);\n"
1366                         << "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1367                         << "}\n"
1368                         ;
1369         }
1370         else
1371         {
1372                 nonVertexShaderTemplateStream
1373                         << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1374                         << "{\n"
1375                         << "    int idx = atomicAdd(buf.data.index, 1);\n"
1376                         << "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1377                         << "}\n"
1378                         ;
1379         }
1380
1381         if (memoryType == AtomicMemoryType::SHARED)
1382         {
1383                 // Invocation zero will copy results back to the descriptor set.
1384                 nonVertexShaderTemplateStream
1385                         << "barrier();\n"
1386                         << "if (gl_LocalInvocationIndex == 0u)\n"
1387                         << "{\n"
1388                         << "    ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1389                         << "}\n"
1390                         ;
1391         }
1392
1393         const auto                                      nonVertexShaderTemplateStreamStr        = nonVertexShaderTemplateStream.str();
1394         const tcu::StringTemplate       nonVertexShaderTemplateSrc                      (nonVertexShaderTemplateStreamStr);
1395
1396         // Shader body for the vertex case.
1397         const tcu::StringTemplate vertexShaderTemplateSrc(
1398                 "int idx = gl_VertexIndex;\n"
1399                 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1400                 "{\n"
1401                 "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1402                 "}\n");
1403
1404         // Extensions.
1405         std::ostringstream extensions;
1406
1407         if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1408         {
1409                 extensions
1410                         << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1411                         << "#extension GL_EXT_shader_atomic_int64 : enable\n"
1412                         ;
1413         }
1414         else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1415         {
1416                 extensions
1417                         << "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
1418                         << "#extension GL_EXT_shader_atomic_float : enable\n"
1419                         << "#extension GL_EXT_shader_atomic_float2 : enable\n"
1420                         << "#extension GL_KHR_memory_scope_semantics : enable\n"
1421                         ;
1422         }
1423
1424         if (memoryType == AtomicMemoryType::REFERENCE)
1425         {
1426                 extensions << "#extension GL_EXT_buffer_reference : require\n";
1427         }
1428
1429         // Specializations.
1430         std::map<std::string, std::string> specializations;
1431
1432         specializations["EXTENSIONS"]                   = extensions.str();
1433         specializations["DATATYPE"]                             = dataType2Str(m_dataType);
1434         specializations["ATOMICOP"]                             = atomicOp2Str(m_atomicOp);
1435         specializations["SETIDX"]                               = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1436         specializations["N"]                                    = de::toString((int)NUM_ELEMENTS);
1437         specializations["COMPARE_ARG"]                  = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1438         specializations["RESULT_BUFFER_NAME"]   = ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
1439
1440         // Shader spec.
1441         m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1442         m_shaderSpec.glslVersion                = glu::GLSL_VERSION_450;
1443         m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1444         m_shaderSpec.source                             = ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1445                                                                                 ? vertexShaderTemplateSrc.specialize(specializations)
1446                                                                                 : nonVertexShaderTemplateSrc.specialize(specializations));
1447
1448         if (memoryType == AtomicMemoryType::SHARED)
1449         {
1450                 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1451                 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1452         }
1453 }
1454
1455 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1456 {
1457         tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1458
1459         static const struct
1460         {
1461                 glu::ShaderType         type;
1462                 const char*                     name;
1463         } shaderTypes[] =
1464         {
1465                 { glu::SHADERTYPE_VERTEX,                                                       "vertex"                        },
1466                 { glu::SHADERTYPE_FRAGMENT,                                                     "fragment"                      },
1467                 { glu::SHADERTYPE_GEOMETRY,                                                     "geometry"                      },
1468                 { glu::SHADERTYPE_TESSELLATION_CONTROL,                         "tess_ctrl"                     },
1469                 { glu::SHADERTYPE_TESSELLATION_EVALUATION,                      "tess_eval"                     },
1470                 { glu::SHADERTYPE_COMPUTE,                                                      "compute"                       },
1471         };
1472
1473         static const struct
1474         {
1475                 AtomicMemoryType        type;
1476                 const char*                     suffix;
1477         } kMemoryTypes[] =
1478         {
1479                 { AtomicMemoryType::BUFFER,             ""                              },
1480                 { AtomicMemoryType::SHARED,             "_shared"               },
1481                 { AtomicMemoryType::REFERENCE,  "_reference"    },
1482         };
1483
1484         static const struct
1485         {
1486                 DataType                dataType;
1487                 const char*             name;
1488                 const char*             description;
1489         } dataSign[] =
1490         {
1491                 { DATA_TYPE_FLOAT16,"float16",                  "Tests using 16-bit float data"                         },
1492                 { DATA_TYPE_INT32,      "signed",                       "Tests using signed data (int)"                         },
1493                 { DATA_TYPE_UINT32,     "unsigned",                     "Tests using unsigned data (uint)"                      },
1494                 { DATA_TYPE_FLOAT32,"float32",                  "Tests using 32-bit float data"                         },
1495                 { DATA_TYPE_INT64,      "signed64bit",          "Tests using 64 bit signed data (int64)"        },
1496                 { DATA_TYPE_UINT64,     "unsigned64bit",        "Tests using 64 bit unsigned data (uint64)"     },
1497                 { DATA_TYPE_FLOAT64,"float64",                  "Tests using 64-bit float data)"                        }
1498         };
1499
1500         static const struct
1501         {
1502                 AtomicOperation         value;
1503                 const char*                     name;
1504         } atomicOp[] =
1505         {
1506                 { ATOMIC_OP_EXCHANGE,   "exchange"      },
1507                 { ATOMIC_OP_COMP_SWAP,  "comp_swap"     },
1508                 { ATOMIC_OP_ADD,                "add"           },
1509                 { ATOMIC_OP_MIN,                "min"           },
1510                 { ATOMIC_OP_MAX,                "max"           },
1511                 { ATOMIC_OP_AND,                "and"           },
1512                 { ATOMIC_OP_OR,                 "or"            },
1513                 { ATOMIC_OP_XOR,                "xor"           }
1514         };
1515
1516         for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1517         {
1518                 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1519                 {
1520                         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1521                         {
1522                                 // Only ADD and EXCHANGE are supported on floating-point
1523                                 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1524                                 {
1525                                         if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
1526                                             atomicOp[opNdx].value != ATOMIC_OP_MIN &&
1527                                             atomicOp[opNdx].value != ATOMIC_OP_MAX &&
1528                                             atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1529                                         {
1530                                                 continue;
1531                                         }
1532                                 }
1533
1534                                 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1535                                 {
1536                                         // Shared memory only available in compute shaders.
1537                                         if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
1538                                                 continue;
1539
1540                                         const std::string description   = std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1541                                         const std::string name                  = std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1542
1543                                         atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1544                                 }
1545                         }
1546                 }
1547         }
1548 }
1549
1550 } // anonymous
1551
1552 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1553 {
1554         return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
1555 }
1556
1557 } // shaderexecutor
1558 } // vkt