external/vulkancts/modules/vulkan/shaderexecutor/vktAtomicOperationTests.cpp

   1 /*------------------------------------------------------------------------
   2  * Vulkan Conformance Tests
   3  * ------------------------
   4  *
   5  * Copyright (c) 2015 The Khronos Group Inc.
   6  * Copyright (c) 2017 Google Inc.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  *
  20  *//*!
  21  * \file
  22  * \brief Atomic operations (OpAtomic*) tests.
  23  *//*--------------------------------------------------------------------*/
  24
  25 #include "vktAtomicOperationTests.hpp"
  26 #include "vktShaderExecutor.hpp"
  27
  28 #include "vkRefUtil.hpp"
  29 #include "vkMemUtil.hpp"
  30 #include "vkQueryUtil.hpp"
  31 #include "vkObjUtil.hpp"
  32 #include "vkBarrierUtil.hpp"
  33 #include "vkCmdUtil.hpp"
  34 #include "vktTestGroupUtil.hpp"
  35
  36 #include "tcuTestLog.hpp"
  37 #include "tcuStringTemplate.hpp"
  38 #include "tcuResultCollector.hpp"
  39
  40 #include "deFloat16.h"
  41 #include "deMath.hpp"
  42 #include "deStringUtil.hpp"
  43 #include "deSharedPtr.hpp"
  44 #include "deRandom.hpp"
  45 #include "deArrayUtil.hpp"
  46
  47 #include <string>
  48 #include <memory>
  49 #include <cmath>
  50
  51 namespace vkt
  52 {
  53 namespace shaderexecutor
  54 {
  55
  56 namespace
  57 {
  58
  59 using de::UniquePtr;
  60 using de::MovePtr;
  61 using std::vector;
  62
  63 using namespace vk;
  64
  65 enum class AtomicMemoryType
  66 {
  67         BUFFER = 0,     // Normal buffer.
  68         SHARED,         // Shared global struct in a compute workgroup.
  69         REFERENCE,      // Buffer passed as a reference.
  70 };
  71
  72 // Helper struct to indicate the shader type and if it should use shared global memory.
  73 class AtomicShaderType
  74 {
  75 public:
  76         AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
  77                 : m_type                                (type)
  78                 , m_atomicMemoryType    (memoryType)
  79         {
  80                 // Shared global memory can only be set to true with compute shaders.
  81                 DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
  82         }
  83
  84         glu::ShaderType         getType                                 (void) const    { return m_type; }
  85         AtomicMemoryType        getMemoryType                   (void) const    { return m_atomicMemoryType; }
  86
  87 private:
  88         glu::ShaderType         m_type;
  89         AtomicMemoryType        m_atomicMemoryType;
  90 };
  91
  92 // Buffer helper
  93 class Buffer
  94 {
  95 public:
  96                                                 Buffer                          (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
  97
  98         VkBuffer                        getBuffer                       (void) const { return *m_buffer;                                        }
  99         void*                           getHostPtr                      (void) const { return m_allocation->getHostPtr();       }
 100         void                            flush                           (void);
 101         void                            invalidate                      (void);
 102
 103 private:
 104         const DeviceInterface&          m_vkd;
 105         const VkDevice                          m_device;
 106         const VkQueue                           m_queue;
 107         const deUint32                          m_queueIndex;
 108         const Unique<VkBuffer>          m_buffer;
 109         const UniquePtr<Allocation>     m_allocation;
 110 };
 111
 112 typedef de::SharedPtr<Buffer> BufferSp;
 113
 114 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
 115 {
 116         const VkBufferCreateInfo createInfo     =
 117         {
 118                 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
 119                 DE_NULL,
 120                 (VkBufferCreateFlags)0,
 121                 size,
 122                 usageFlags,
 123                 VK_SHARING_MODE_EXCLUSIVE,
 124                 0u,
 125                 DE_NULL
 126         };
 127         return createBuffer(vkd, device, &createInfo);
 128 }
 129
 130 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
 131 {
 132         const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
 133         MovePtr<Allocation>     alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
 134
 135         VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
 136
 137         return alloc;
 138 }
 139
 140 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
 141         : m_vkd                 (context.getDeviceInterface())
 142         , m_device              (context.getDevice())
 143         , m_queue               (context.getUniversalQueue())
 144         , m_queueIndex  (context.getUniversalQueueFamilyIndex())
 145         , m_buffer              (createBuffer                   (context.getDeviceInterface(),
 146                                                                                          context.getDevice(),
 147                                                                                          (VkDeviceSize)size,
 148                                                                                          usage))
 149         , m_allocation  (allocateAndBindMemory  (context.getDeviceInterface(),
 150                                                                                          context.getDevice(),
 151                                                                                          context.getDefaultAllocator(),
 152                                                                                          *m_buffer,
 153                                                                                          useRef))
 154 {
 155 }
 156
 157 void Buffer::flush (void)
 158 {
 159         flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
 160 }
 161
 162 void Buffer::invalidate (void)
 163 {
 164         const auto      cmdPool                 = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
 165         const auto      cmdBufferPtr    = vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
 166         const auto      cmdBuffer               = cmdBufferPtr.get();
 167         const auto      bufferBarrier   = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
 168
 169         beginCommandBuffer(m_vkd, cmdBuffer);
 170         m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
 171         endCommandBuffer(m_vkd, cmdBuffer);
 172         submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
 173
 174         invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
 175 }
 176
 177 // Tests
 178
 179 enum AtomicOperation
 180 {
 181         ATOMIC_OP_EXCHANGE = 0,
 182         ATOMIC_OP_COMP_SWAP,
 183         ATOMIC_OP_ADD,
 184         ATOMIC_OP_MIN,
 185         ATOMIC_OP_MAX,
 186         ATOMIC_OP_AND,
 187         ATOMIC_OP_OR,
 188         ATOMIC_OP_XOR,
 189
 190         ATOMIC_OP_LAST
 191 };
 192
 193 std::string atomicOp2Str (AtomicOperation op)
 194 {
 195         static const char* const s_names[] =
 196         {
 197                 "atomicExchange",
 198                 "atomicCompSwap",
 199                 "atomicAdd",
 200                 "atomicMin",
 201                 "atomicMax",
 202                 "atomicAnd",
 203                 "atomicOr",
 204                 "atomicXor"
 205         };
 206         return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
 207 }
 208
 209 enum
 210 {
 211         NUM_ELEMENTS = 32
 212 };
 213
 214 enum DataType
 215 {
 216         DATA_TYPE_FLOAT16 = 0,
 217         DATA_TYPE_INT32,
 218         DATA_TYPE_UINT32,
 219         DATA_TYPE_FLOAT32,
 220         DATA_TYPE_INT64,
 221         DATA_TYPE_UINT64,
 222         DATA_TYPE_FLOAT64,
 223
 224         DATA_TYPE_LAST
 225 };
 226
 227 std::string dataType2Str(DataType type)
 228 {
 229         static const char* const s_names[] =
 230         {
 231                 "float16_t",
 232                 "int",
 233                 "uint",
 234                 "float",
 235                 "int64_t",
 236                 "uint64_t",
 237                 "double",
 238         };
 239         return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
 240 }
 241
 242 class BufferInterface
 243 {
 244 public:
 245         virtual void setBuffer(void* ptr) = 0;
 246
 247         virtual size_t bufferSize() = 0;
 248
 249         virtual void fillWithTestData(de::Random &rnd) = 0;
 250
 251         virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
 252
 253         virtual ~BufferInterface() {};
 254 };
 255
 256 template<typename dataTypeT>
 257 class TestBuffer : public BufferInterface
 258 {
 259 public:
 260
 261         TestBuffer(AtomicOperation      atomicOp)
 262                 : m_atomicOp(atomicOp)
 263         {}
 264
 265         template<typename T>
 266         struct BufferData
 267         {
 268                 // Use half the number of elements for inout to cause overlap between atomic operations.
 269                 // Each inout element at index i will have two atomic operations using input from
 270                 // indices i and i + NUM_ELEMENTS / 2.
 271                 T                       inout[NUM_ELEMENTS / 2];
 272                 T                       input[NUM_ELEMENTS];
 273                 T                       compare[NUM_ELEMENTS];
 274                 T                       output[NUM_ELEMENTS];
 275                 T                       invocationHitCount[NUM_ELEMENTS];
 276                 deInt32         index;
 277         };
 278
 279         virtual void setBuffer(void* ptr)
 280         {
 281                 m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
 282         }
 283
 284         virtual size_t bufferSize()
 285         {
 286                 return sizeof(BufferData<dataTypeT>);
 287         }
 288
 289         virtual void fillWithTestData(de::Random &rnd)
 290         {
 291                 dataTypeT pattern;
 292                 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
 293
 294                 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
 295                 {
 296                         m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
 297                         // The first half of compare elements match with every even index.
 298                         // The second half matches with odd indices. This causes the
 299                         // overlapping operations to only select one.
 300                         m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
 301                         m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
 302                 }
 303                 for (int i = 0; i < NUM_ELEMENTS; i++)
 304                 {
 305                         m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
 306                         m_ptr->output[i] = pattern;
 307                         m_ptr->invocationHitCount[i] = 0;
 308                 }
 309                 m_ptr->index = 0;
 310
 311                 // Take a copy to be used when calculating expected values.
 312                 m_original = *m_ptr;
 313         }
 314
 315         virtual void checkResults(tcu::ResultCollector& resultCollector)
 316         {
 317                 checkOperation(m_original, *m_ptr, resultCollector);
 318         }
 319
 320         template<typename T>
 321         struct Expected
 322         {
 323                 T m_inout;
 324                 T m_output[2];
 325
 326                 Expected (T inout, T output0, T output1)
 327                 : m_inout(inout)
 328                 {
 329                         m_output[0] = output0;
 330                         m_output[1] = output1;
 331                 }
 332
 333                 bool compare (T inout, T output0, T output1)
 334                 {
 335                         return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
 336                                         && deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
 337                                         && deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
 338                 }
 339         };
 340
 341         void checkOperation     (const BufferData<dataTypeT>&   original,
 342                                                  const BufferData<dataTypeT>&   result,
 343                                                  tcu::ResultCollector&                  resultCollector);
 344
 345         const AtomicOperation   m_atomicOp;
 346
 347         BufferData<dataTypeT>* m_ptr;
 348         BufferData<dataTypeT>  m_original;
 349
 350 };
 351
 352 template<typename T>
 353 bool nanSafeSloppyEquals(T x, T y)
 354 {
 355         if (deIsIEEENaN(x) && deIsIEEENaN(y))
 356                 return true;
 357
 358         if (deIsIEEENaN(x) || deIsIEEENaN(y))
 359                 return false;
 360
 361         return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
 362 }
 363
 364 template<typename dataTypeT>
 365 class TestBufferFloatingPoint : public BufferInterface
 366 {
 367 public:
 368
 369         TestBufferFloatingPoint(AtomicOperation atomicOp)
 370                 : m_atomicOp(atomicOp)
 371         {}
 372
 373         template<typename T>
 374         struct BufferDataFloatingPoint
 375         {
 376                 // Use half the number of elements for inout to cause overlap between atomic operations.
 377                 // Each inout element at index i will have two atomic operations using input from
 378                 // indices i and i + NUM_ELEMENTS / 2.
 379                 T                       inout[NUM_ELEMENTS / 2];
 380                 T                       input[NUM_ELEMENTS];
 381                 T                       compare[NUM_ELEMENTS];
 382                 T                       output[NUM_ELEMENTS];
 383                 deInt32         invocationHitCount[NUM_ELEMENTS];
 384                 deInt32         index;
 385         };
 386
 387         virtual void setBuffer(void* ptr)
 388         {
 389                 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
 390         }
 391
 392         virtual size_t bufferSize()
 393         {
 394                 return sizeof(BufferDataFloatingPoint<dataTypeT>);
 395         }
 396
 397         virtual void fillWithTestData(de::Random& rnd)
 398         {
 399                 dataTypeT pattern;
 400                 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
 401
 402                 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
 403                 {
 404                         m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
 405                         // These aren't used by any of the float tests
 406                         m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
 407                 }
 408                 // Add special cases for NaN and +/-0
 409                 // 0: min(sNaN, x)
 410                 m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
 411                 // 1: min(x, sNaN)
 412                 m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
 413                 // 2: min(qNaN, x)
 414                 m_ptr->inout[2] = deQuietNaN<dataTypeT>();
 415                 // 3: min(x, qNaN)
 416                 m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
 417                 // 4: min(NaN, NaN)
 418                 m_ptr->inout[4] = deSignalingNaN<dataTypeT>();
 419                 m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
 420                 m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
 421                 // 5: min(+0, -0)
 422                 m_ptr->inout[5] = deToFloatType<dataTypeT>(-0.0);
 423                 m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
 424                 m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
 425
 426                 for (int i = 0; i < NUM_ELEMENTS; i++)
 427                 {
 428                         m_ptr->input[i] = deToFloatType<dataTypeT>(rnd.getFloat());
 429                         m_ptr->output[i] = pattern;
 430                         m_ptr->invocationHitCount[i] = 0;
 431                 }
 432
 433                 m_ptr->index = 0;
 434
 435                 // Take a copy to be used when calculating expected values.
 436                 m_original = *m_ptr;
 437         }
 438
 439         virtual void checkResults(tcu::ResultCollector& resultCollector)
 440         {
 441                 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
 442         }
 443
 444         template<typename T>
 445         struct Expected
 446         {
 447                 T m_inout;
 448                 T m_output[2];
 449
 450                 Expected(T inout, T output0, T output1)
 451                         : m_inout(inout)
 452                 {
 453                         m_output[0] = output0;
 454                         m_output[1] = output1;
 455                 }
 456
 457                 bool compare(T inout, T output0, T output1)
 458                 {
 459                         return nanSafeSloppyEquals(m_inout, inout) &&
 460                                nanSafeSloppyEquals(m_output[0], output0) &&
 461                                nanSafeSloppyEquals(m_output[1], output1);
 462                 }
 463         };
 464
 465         void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
 466                 const BufferDataFloatingPoint<dataTypeT>& result,
 467                 tcu::ResultCollector& resultCollector);
 468
 469         const AtomicOperation   m_atomicOp;
 470
 471         BufferDataFloatingPoint<dataTypeT>* m_ptr;
 472         BufferDataFloatingPoint<dataTypeT>  m_original;
 473
 474 };
 475
 476 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
 477 {
 478         switch (type)
 479         {
 480         case DATA_TYPE_FLOAT16:
 481                 return new TestBufferFloatingPoint<deFloat16>(atomicOp);
 482         case DATA_TYPE_INT32:
 483                 return new TestBuffer<deInt32>(atomicOp);
 484         case DATA_TYPE_UINT32:
 485                 return new TestBuffer<deUint32>(atomicOp);
 486         case DATA_TYPE_FLOAT32:
 487                 return new TestBufferFloatingPoint<float>(atomicOp);
 488         case DATA_TYPE_INT64:
 489                 return new TestBuffer<deInt64>(atomicOp);
 490         case DATA_TYPE_UINT64:
 491                 return new TestBuffer<deUint64>(atomicOp);
 492         case DATA_TYPE_FLOAT64:
 493                 return new TestBufferFloatingPoint<double>(atomicOp);
 494         default:
 495                 DE_ASSERT(false);
 496                 return DE_NULL;
 497         }
 498 }
 499
 500 // Use template to handle both signed and unsigned cases. SPIR-V should
 501 // have separate operations for both.
 502 template<typename T>
 503 void TestBuffer<T>::checkOperation (const BufferData<T>&        original,
 504                                                                         const BufferData<T>&    result,
 505                                                                         tcu::ResultCollector&   resultCollector)
 506 {
 507         // originalInout = original inout
 508         // input0 = input at index i
 509         // iinput1 = input at index i + NUM_ELEMENTS / 2
 510         //
 511         // atomic operation will return the memory contents before
 512         // the operation and this is stored as output. Two operations
 513         // are executed for each InOut value (using input0 and input1).
 514         //
 515         // Since there is an overlap of two operations per each
 516         // InOut element, the outcome of the resulting InOut and
 517         // the outputs of the operations have two result candidates
 518         // depending on the execution order. Verification passes
 519         // if the results match one of these options.
 520
 521         for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
 522         {
 523                 // Needed when reinterpeting the data as signed values.
 524                 const T originalInout   = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
 525                 const T input0                  = *reinterpret_cast<const T*>(&original.input[elementNdx]);
 526                 const T input1                  = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
 527
 528                 // Expected results are collected to this vector.
 529                 vector<Expected<T> > exp;
 530
 531                 switch (m_atomicOp)
 532                 {
 533                         case ATOMIC_OP_ADD:
 534                         {
 535                                 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
 536                                 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
 537                         }
 538                         break;
 539
 540                         case ATOMIC_OP_AND:
 541                         {
 542                                 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
 543                                 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
 544                         }
 545                         break;
 546
 547                         case ATOMIC_OP_OR:
 548                         {
 549                                 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
 550                                 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
 551                         }
 552                         break;
 553
 554                         case ATOMIC_OP_XOR:
 555                         {
 556                                 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
 557                                 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
 558                         }
 559                         break;
 560
 561                         case ATOMIC_OP_MIN:
 562                         {
 563                                 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
 564                                 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
 565                         }
 566                         break;
 567
 568                         case ATOMIC_OP_MAX:
 569                         {
 570                                 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
 571                                 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
 572                         }
 573                         break;
 574
 575                         case ATOMIC_OP_EXCHANGE:
 576                         {
 577                                 exp.push_back(Expected<T>(input1, originalInout, input0));
 578                                 exp.push_back(Expected<T>(input0, input1, originalInout));
 579                         }
 580                         break;
 581
 582                         case ATOMIC_OP_COMP_SWAP:
 583                         {
 584                                 if (elementNdx % 2 == 0)
 585                                 {
 586                                         exp.push_back(Expected<T>(input0, originalInout, input0));
 587                                         exp.push_back(Expected<T>(input0, originalInout, originalInout));
 588                                 }
 589                                 else
 590                                 {
 591                                         exp.push_back(Expected<T>(input1, input1, originalInout));
 592                                         exp.push_back(Expected<T>(input1, originalInout, originalInout));
 593                                 }
 594                         }
 595                         break;
 596
 597
 598                         default:
 599                                 DE_FATAL("Unexpected atomic operation.");
 600                                 break;
 601                 };
 602
 603                 const T resIo           = result.inout[elementNdx];
 604                 const T resOutput0      = result.output[elementNdx];
 605                 const T resOutput1      = result.output[elementNdx + NUM_ELEMENTS / 2];
 606
 607
 608                 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
 609                 {
 610                         std::ostringstream errorMessage;
 611                         errorMessage    << "ERROR: Result value check failed at index " << elementNdx
 612                                                         << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
 613                                                         << ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
 614                                                         << tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
 615                                                         << ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
 616                                                         << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
 617                                                         << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
 618                                                         << tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
 619                                                         << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
 620
 621                         resultCollector.fail(errorMessage.str());
 622                 }
 623         }
 624 }
 625
 626 template<typename T>
 627 void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
 628 {
 629
 630         if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
 631         {
 632                 values.push_back(deQuietNaN<T>());
 633                 values.push_back(deSignalingNaN<T>());
 634         }
 635         else if (deIsSignalingNaN(x))
 636         {
 637                 values.push_back(deQuietNaN<T>());
 638                 values.push_back(deSignalingNaN<T>());
 639                 if (!deIsIEEENaN(y))
 640                         values.push_back(y);
 641         }
 642         else if (deIsSignalingNaN(y))
 643         {
 644                 values.push_back(deQuietNaN<T>());
 645                 values.push_back(deSignalingNaN<T>());
 646                 if (!deIsIEEENaN(x))
 647                         values.push_back(x);
 648         }
 649         else if (deIsIEEENaN(x) && deIsIEEENaN(y))
 650         {
 651                 // Both quiet NaNs
 652                 values.push_back(deQuietNaN<T>());
 653         }
 654         else if (deIsIEEENaN(x))
 655         {
 656                 // One quiet NaN and one non-NaN.
 657                 values.push_back(y);
 658         }
 659         else if (deIsIEEENaN(y))
 660         {
 661                 // One quiet NaN and one non-NaN.
 662                 values.push_back(x);
 663         }
 664         else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
 665         {
 666                 values.push_back(deToFloatType<T>(0.0));
 667                 values.push_back(deToFloatType<T>(-0.0));
 668         }
 669 }
 670
 671 template<typename T>
 672 T floatAdd(T x, T y)
 673 {
 674         if (deIsIEEENaN(x) || deIsIEEENaN(y))
 675                 return deQuietNaN<T>();
 676         return deToFloatType<T>(deToDouble(x) + deToDouble(y));
 677 }
 678
 679 template<typename T>
 680 vector<T> floatMinValues(T x, T y)
 681 {
 682         vector<T> values;
 683         handleExceptionalFloatMinMaxValues(values, x, y);
 684         if (values.empty())
 685         {
 686                 values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
 687         }
 688         return values;
 689 }
 690
 691 template<typename T>
 692 vector<T> floatMaxValues(T x, T y)
 693 {
 694         vector<T> values;
 695         handleExceptionalFloatMinMaxValues(values, x, y);
 696         if (values.empty())
 697         {
 698                 values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
 699         }
 700         return values;
 701 }
 702
 703 // Use template to handle both float and double cases. SPIR-V should
 704 // have separate operations for both.
 705 template<typename T>
 706 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
 707         const BufferDataFloatingPoint<T>& result,
 708         tcu::ResultCollector& resultCollector)
 709 {
 710         // originalInout = original inout
 711         // input0 = input at index i
 712         // iinput1 = input at index i + NUM_ELEMENTS / 2
 713         //
 714         // atomic operation will return the memory contents before
 715         // the operation and this is stored as output. Two operations
 716         // are executed for each InOut value (using input0 and input1).
 717         //
 718         // Since there is an overlap of two operations per each
 719         // InOut element, the outcome of the resulting InOut and
 720         // the outputs of the operations have two result candidates
 721         // depending on the execution order. Verification passes
 722         // if the results match one of these options.
 723
 724         for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
 725         {
 726                 // Needed when reinterpeting the data as signed values.
 727                 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
 728                 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
 729                 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
 730
 731                 // Expected results are collected to this vector.
 732                 vector<Expected<T> > exp;
 733
 734                 switch (m_atomicOp)
 735                 {
 736                 case ATOMIC_OP_ADD:
 737                 {
 738                         exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout, floatAdd(originalInout, input0)));
 739                         exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), floatAdd(originalInout, input1), originalInout));
 740                 }
 741                 break;
 742
 743                 case ATOMIC_OP_MIN:
 744                 {
 745                         // The case where input0 is combined first
 746                         vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
 747                         for (T x : minOriginalAndInput0)
 748                         {
 749                                 vector<T> minAll = floatMinValues(x, input1);
 750                                 for (T y : minAll)
 751                                 {
 752                                         exp.push_back(Expected<T>(y, originalInout, x));
 753                                 }
 754                         }
 755
 756                         // The case where input1 is combined first
 757                         vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
 758                         for (T x : minOriginalAndInput1)
 759                         {
 760                                 vector<T> minAll = floatMinValues(x, input0);
 761                                 for (T y : minAll)
 762                                 {
 763                                         exp.push_back(Expected<T>(y, x, originalInout));
 764                                 }
 765                         }
 766                 }
 767                 break;
 768
 769                 case ATOMIC_OP_MAX:
 770                 {
 771                         // The case where input0 is combined first
 772                         vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
 773                         for (T x : minOriginalAndInput0)
 774                         {
 775                                 vector<T> minAll = floatMaxValues(x, input1);
 776                                 for (T y : minAll)
 777                                 {
 778                                         exp.push_back(Expected<T>(y, originalInout, x));
 779                                 }
 780                         }
 781
 782                         // The case where input1 is combined first
 783                         vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
 784                         for (T x : minOriginalAndInput1)
 785                         {
 786                                 vector<T> minAll = floatMaxValues(x, input0);
 787                                 for (T y : minAll)
 788                                 {
 789                                         exp.push_back(Expected<T>(y, x, originalInout));
 790                                 }
 791                         }
 792                 }
 793                 break;
 794
 795                 case ATOMIC_OP_EXCHANGE:
 796                 {
 797                         exp.push_back(Expected<T>(input1, originalInout, input0));
 798                         exp.push_back(Expected<T>(input0, input1, originalInout));
 799                 }
 800                 break;
 801
 802                 default:
 803                         DE_FATAL("Unexpected atomic operation.");
 804                         break;
 805                 };
 806
 807                 const T resIo = result.inout[elementNdx];
 808                 const T resOutput0 = result.output[elementNdx];
 809                 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
 810
 811
 812                 bool hasMatch = false;
 813                 for (Expected<T> e : exp)
 814                 {
 815                         if (e.compare(resIo, resOutput0, resOutput1))
 816                         {
 817                                 hasMatch = true;
 818                                 break;
 819                         }
 820                 }
 821                 if (!hasMatch)
 822                 {
 823                         std::ostringstream errorMessage;
 824                         errorMessage << "ERROR: Result value check failed at index " << elementNdx
 825                                 << ". Expected one of the outcomes:";
 826
 827                         bool first = true;
 828                         for (Expected<T> e : exp)
 829                         {
 830                                 if (!first)
 831                                         errorMessage << ", or";
 832                                 first = false;
 833
 834                                 errorMessage << " InOut = " << e.m_inout
 835                                         << ", Output0 = " << e.m_output[0]
 836                                         << ", Output1 = " << e.m_output[1];
 837                         }
 838
 839                         errorMessage << ". Got: InOut = " << resIo
 840                                 << ", Output0 = " << resOutput0
 841                                 << ", Output1 = " << resOutput1
 842                                 << ". Using Input0 = " << original.input[elementNdx]
 843                                 << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
 844
 845                         resultCollector.fail(errorMessage.str());
 846                 }
 847         }
 848 }
 849
 850 class AtomicOperationCaseInstance : public TestInstance
 851 {
 852 public:
 853                                                                         AtomicOperationCaseInstance             (Context&                       context,
 854                                                                                                                                          const ShaderSpec&      shaderSpec,
 855                                                                                                                                          AtomicShaderType       shaderType,
 856                                                                                                                                          DataType                       dataType,
 857                                                                                                                                          AtomicOperation        atomicOp);
 858
 859         virtual tcu::TestStatus                 iterate                                                 (void);
 860
 861 private:
 862         const ShaderSpec&                               m_shaderSpec;
 863         AtomicShaderType                                m_shaderType;
 864         const DataType                                  m_dataType;
 865         AtomicOperation                                 m_atomicOp;
 866
 867 };
 868
 869 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context&                              context,
 870                                                                                                                   const ShaderSpec&             shaderSpec,
 871                                                                                                                   AtomicShaderType              shaderType,
 872                                                                                                                   DataType                              dataType,
 873                                                                                                                   AtomicOperation               atomicOp)
 874         : TestInstance  (context)
 875         , m_shaderSpec  (shaderSpec)
 876         , m_shaderType  (shaderType)
 877         , m_dataType    (dataType)
 878         , m_atomicOp    (atomicOp)
 879 {
 880 }
 881
 882 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
 883 {
 884         de::UniquePtr<BufferInterface>  testBuffer      (createTestBuffer(m_dataType, m_atomicOp));
 885         tcu::TestLog&                                   log                     = m_context.getTestContext().getLog();
 886         const DeviceInterface&                  vkd                     = m_context.getDeviceInterface();
 887         const VkDevice                                  device          = m_context.getDevice();
 888         de::Random                                              rnd                     (0x62a15e34);
 889         const bool                                              useRef          = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
 890         const VkDescriptorType                  descType        = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
 891         const VkBufferUsageFlags                usageFlags      = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
 892
 893         // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
 894         // a uniform buffer. If not, it will be passed directly as a descriptor.
 895         Buffer                                                  buffer          (m_context, usageFlags, testBuffer->bufferSize(), useRef);
 896         std::unique_ptr<Buffer>                 auxBuffer;
 897
 898         if (useRef)
 899         {
 900                 // Pass the main buffer address inside a uniform buffer.
 901                 const VkBufferDeviceAddressInfo addressInfo =
 902                 {
 903                         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,   //      VkStructureType sType;
 904                         nullptr,                                                                                //      const void*             pNext;
 905                         buffer.getBuffer(),                                                             //      VkBuffer                buffer;
 906                 };
 907                 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
 908
 909                 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
 910                 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
 911                 auxBuffer->flush();
 912         }
 913
 914         testBuffer->setBuffer(buffer.getHostPtr());
 915         testBuffer->fillWithTestData(rnd);
 916
 917         buffer.flush();
 918
 919         Move<VkDescriptorSetLayout>     extraResourcesLayout;
 920         Move<VkDescriptorPool>          extraResourcesSetPool;
 921         Move<VkDescriptorSet>           extraResourcesSet;
 922
 923         const VkDescriptorSetLayoutBinding bindings[] =
 924         {
 925                 { 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
 926         };
 927
 928         const VkDescriptorSetLayoutCreateInfo   layoutInfo      =
 929         {
 930                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
 931                 DE_NULL,
 932                 (VkDescriptorSetLayoutCreateFlags)0u,
 933                 DE_LENGTH_OF_ARRAY(bindings),
 934                 bindings
 935         };
 936
 937         extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
 938
 939         const VkDescriptorPoolSize poolSizes[] =
 940         {
 941                 { descType, 1u }
 942         };
 943
 944         const VkDescriptorPoolCreateInfo poolInfo =
 945         {
 946                 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
 947                 DE_NULL,
 948                 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
 949                 1u,             // maxSets
 950                 DE_LENGTH_OF_ARRAY(poolSizes),
 951                 poolSizes
 952         };
 953
 954         extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
 955
 956         const VkDescriptorSetAllocateInfo allocInfo =
 957         {
 958                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
 959                 DE_NULL,
 960                 *extraResourcesSetPool,
 961                 1u,
 962                 &extraResourcesLayout.get()
 963         };
 964
 965         extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
 966
 967         VkDescriptorBufferInfo bufferInfo;
 968         bufferInfo.buffer       = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
 969         bufferInfo.offset       = 0u;
 970         bufferInfo.range        = VK_WHOLE_SIZE;
 971
 972         const VkWriteDescriptorSet descriptorWrite =
 973         {
 974                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 975                 DE_NULL,
 976                 *extraResourcesSet,
 977                 0u,             // dstBinding
 978                 0u,             // dstArrayElement
 979                 1u,
 980                 descType,
 981                 (const VkDescriptorImageInfo*)DE_NULL,
 982                 &bufferInfo,
 983                 (const VkBufferView*)DE_NULL
 984         };
 985
 986         vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
 987
 988         // Storage for output varying data.
 989         std::vector<deUint32>   outputs         (NUM_ELEMENTS);
 990         std::vector<void*>              outputPtr       (NUM_ELEMENTS);
 991
 992         for (size_t i = 0; i < NUM_ELEMENTS; i++)
 993         {
 994                 outputs[i] = 0xcdcdcdcd;
 995                 outputPtr[i] = &outputs[i];
 996         }
 997
 998         const int                                       numWorkGroups   = ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
 999         UniquePtr<ShaderExecutor>       executor                (createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
1000
1001         executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
1002         buffer.invalidate();
1003
1004         tcu::ResultCollector resultCollector(log);
1005
1006         // Check the results of the atomic operation
1007         testBuffer->checkResults(resultCollector);
1008
1009         return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
1010 }
1011
1012 class AtomicOperationCase : public TestCase
1013 {
1014 public:
1015                                                         AtomicOperationCase             (tcu::TestContext&              testCtx,
1016                                                                                                          const char*                    name,
1017                                                                                                          const char*                    description,
1018                                                                                                          AtomicShaderType               type,
1019                                                                                                          DataType                               dataType,
1020                                                                                                          AtomicOperation                atomicOp);
1021         virtual                                 ~AtomicOperationCase    (void);
1022
1023         virtual TestInstance*   createInstance                  (Context& ctx) const;
1024         virtual void                    checkSupport                    (Context& ctx) const;
1025         virtual void                    initPrograms                    (vk::SourceCollections& programCollection) const
1026         {
1027                 generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
1028         }
1029
1030 private:
1031
1032         void                                    createShaderSpec();
1033         ShaderSpec                              m_shaderSpec;
1034         const AtomicShaderType  m_shaderType;
1035         const DataType                  m_dataType;
1036         const AtomicOperation   m_atomicOp;
1037 };
1038
1039 AtomicOperationCase::AtomicOperationCase (tcu::TestContext&     testCtx,
1040                                                                                   const char*           name,
1041                                                                                   const char*           description,
1042                                                                                   AtomicShaderType      shaderType,
1043                                                                                   DataType                      dataType,
1044                                                                                   AtomicOperation       atomicOp)
1045         : TestCase                      (testCtx, name, description)
1046         , m_shaderType          (shaderType)
1047         , m_dataType            (dataType)
1048         , m_atomicOp            (atomicOp)
1049 {
1050         createShaderSpec();
1051         init();
1052 }
1053
1054 AtomicOperationCase::~AtomicOperationCase (void)
1055 {
1056 }
1057
1058 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
1059 {
1060         return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
1061 }
1062
1063 void AtomicOperationCase::checkSupport (Context& ctx) const
1064 {
1065         if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1066         {
1067                 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
1068
1069                 const auto atomicInt64Features  = ctx.getShaderAtomicInt64Features();
1070                 const bool isSharedMemory               = (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
1071
1072                 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
1073                 {
1074                         TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
1075                 }
1076                 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
1077                 {
1078                         TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
1079                 }
1080         }
1081
1082         if (m_dataType == DATA_TYPE_FLOAT16)
1083         {
1084                 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1085                 if (m_atomicOp == ATOMIC_OP_ADD)
1086                 {
1087                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1088                         {
1089                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
1090                                 {
1091                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
1092                                 }
1093                         }
1094                         else
1095                         {
1096                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
1097                                 {
1098                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
1099                                 }
1100                         }
1101                 }
1102                 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1103                 {
1104                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1105                         {
1106                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
1107                                 {
1108                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
1109                                 }
1110                         }
1111                         else
1112                         {
1113                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
1114                                 {
1115                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
1116                                 }
1117                         }
1118                 }
1119                 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1120                 {
1121                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1122                         {
1123                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
1124                                 {
1125                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
1126                                 }
1127                         }
1128                         else
1129                         {
1130                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
1131                                 {
1132                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
1133                                 }
1134                         }
1135                 }
1136         }
1137
1138         if (m_dataType == DATA_TYPE_FLOAT32)
1139         {
1140                 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1141                 if (m_atomicOp == ATOMIC_OP_ADD)
1142                 {
1143                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1144                         {
1145                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
1146                                 {
1147                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
1148                                 }
1149                         }
1150                         else
1151                         {
1152                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
1153                                 {
1154                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
1155                                 }
1156                         }
1157                 }
1158                 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1159                 {
1160                         ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1161                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1162                         {
1163                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
1164                                 {
1165                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
1166                                 }
1167                         }
1168                         else
1169                         {
1170                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
1171                                 {
1172                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
1173                                 }
1174                         }
1175                 }
1176                 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1177                 {
1178                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1179                         {
1180                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
1181                                 {
1182                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
1183                                 }
1184                         }
1185                         else
1186                         {
1187                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
1188                                 {
1189                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
1190                                 }
1191                         }
1192                 }
1193         }
1194
1195         if (m_dataType == DATA_TYPE_FLOAT64)
1196         {
1197                 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1198                 if (m_atomicOp == ATOMIC_OP_ADD)
1199                 {
1200                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1201                         {
1202                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
1203                                 {
1204                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
1205                                 }
1206                         }
1207                         else
1208                         {
1209                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
1210                                 {
1211                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
1212                                 }
1213                         }
1214                 }
1215                 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1216                 {
1217                         ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1218                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1219                         {
1220                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
1221                                 {
1222                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
1223                                 }
1224                         }
1225                         else
1226                         {
1227                                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
1228                                 {
1229                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
1230                                 }
1231                         }
1232                 }
1233                 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1234                 {
1235                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1236                         {
1237                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
1238                                 {
1239                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
1240                                 }
1241                         }
1242                         else
1243                         {
1244                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
1245                                 {
1246                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
1247                                 }
1248                         }
1249                 }
1250         }
1251
1252         if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
1253         {
1254                 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
1255         }
1256
1257         // Check stores and atomic operation support.
1258         switch (m_shaderType.getType())
1259         {
1260         case glu::SHADERTYPE_VERTEX:
1261         case glu::SHADERTYPE_TESSELLATION_CONTROL:
1262         case glu::SHADERTYPE_TESSELLATION_EVALUATION:
1263         case glu::SHADERTYPE_GEOMETRY:
1264                 if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
1265                         TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
1266                 break;
1267         case glu::SHADERTYPE_FRAGMENT:
1268                 if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
1269                         TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
1270                 break;
1271         case glu::SHADERTYPE_COMPUTE:
1272                 break;
1273         default:
1274                 DE_FATAL("Unsupported shader type");
1275         }
1276
1277         checkSupportShader(ctx, m_shaderType.getType());
1278 }
1279
1280 void AtomicOperationCase::createShaderSpec (void)
1281 {
1282         const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1283
1284         // Global declarations.
1285         std::ostringstream shaderTemplateGlobalStream;
1286
1287         // Structure in use for atomic operations.
1288         shaderTemplateGlobalStream
1289                 << "${EXTENSIONS}\n"
1290                 << "\n"
1291                 << "struct AtomicStruct\n"
1292                 << "{\n"
1293                 << "    ${DATATYPE} inoutValues[${N}/2];\n"
1294                 << "    ${DATATYPE} inputValues[${N}];\n"
1295                 << "    ${DATATYPE} compareValues[${N}];\n"
1296                 << "    ${DATATYPE} outputValues[${N}];\n"
1297                 << "    int invocationHitCount[${N}];\n"
1298                 << "    int index;\n"
1299                 << "};\n"
1300                 << "\n"
1301                 ;
1302
1303         // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1304         // as "buf.data", which is the name used in the atomic operation statements.
1305         //
1306         // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1307         // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1308         // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1309         //
1310         if (memoryType != AtomicMemoryType::REFERENCE)
1311         {
1312                 shaderTemplateGlobalStream
1313                         << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1314                         << "    AtomicStruct data;\n"
1315                         << "} ${RESULT_BUFFER_NAME};\n"
1316                         << "\n"
1317                         ;
1318
1319                 // When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
1320                 // descriptor set as the sources and results of each tested operation.
1321                 if (memoryType == AtomicMemoryType::SHARED)
1322                 {
1323                         shaderTemplateGlobalStream
1324                                 << "shared struct { AtomicStruct data; } buf;\n"
1325                                 << "\n"
1326                                 ;
1327                 }
1328         }
1329         else
1330         {
1331                 shaderTemplateGlobalStream
1332                         << "layout (buffer_reference) buffer AtomicBuffer {\n"
1333                         << "    AtomicStruct data;\n"
1334                         << "};\n"
1335                         << "\n"
1336                         << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1337                         << "    AtomicBuffer buf;\n"
1338                         << "};\n"
1339                         << "\n"
1340                         ;
1341         }
1342
1343         const auto                                      shaderTemplateGlobalString      = shaderTemplateGlobalStream.str();
1344         const tcu::StringTemplate       shaderTemplateGlobal            (shaderTemplateGlobalString);
1345
1346         // Shader body for the non-vertex case.
1347         std::ostringstream nonVertexShaderTemplateStream;
1348
1349         if (memoryType == AtomicMemoryType::SHARED)
1350         {
1351                 // Invocation zero will initialize the shared structure from the descriptor set.
1352                 nonVertexShaderTemplateStream
1353                         << "if (gl_LocalInvocationIndex == 0u)\n"
1354                         << "{\n"
1355                         << "    buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1356                         << "}\n"
1357                         << "barrier();\n"
1358                         ;
1359         }
1360
1361         if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1362         {
1363                 nonVertexShaderTemplateStream
1364                         << "if (!gl_HelperInvocation) {\n"
1365                         << "    int idx = atomicAdd(buf.data.index, 1);\n"
1366                         << "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1367                         << "}\n"
1368                         ;
1369         }
1370         else
1371         {
1372                 nonVertexShaderTemplateStream
1373                         << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1374                         << "{\n"
1375                         << "    int idx = atomicAdd(buf.data.index, 1);\n"
1376                         << "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1377                         << "}\n"
1378                         ;
1379         }
1380
1381         if (memoryType == AtomicMemoryType::SHARED)
1382         {
1383                 // Invocation zero will copy results back to the descriptor set.
1384                 nonVertexShaderTemplateStream
1385                         << "barrier();\n"
1386                         << "if (gl_LocalInvocationIndex == 0u)\n"
1387                         << "{\n"
1388                         << "    ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1389                         << "}\n"
1390                         ;
1391         }
1392
1393         const auto                                      nonVertexShaderTemplateStreamStr        = nonVertexShaderTemplateStream.str();
1394         const tcu::StringTemplate       nonVertexShaderTemplateSrc                      (nonVertexShaderTemplateStreamStr);
1395
1396         // Shader body for the vertex case.
1397         const tcu::StringTemplate vertexShaderTemplateSrc(
1398                 "int idx = gl_VertexIndex;\n"
1399                 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1400                 "{\n"
1401                 "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1402                 "}\n");
1403
1404         // Extensions.
1405         std::ostringstream extensions;
1406
1407         if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1408         {
1409                 extensions
1410                         << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1411                         << "#extension GL_EXT_shader_atomic_int64 : enable\n"
1412                         ;
1413         }
1414         else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1415         {
1416                 extensions
1417                         << "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
1418                         << "#extension GL_EXT_shader_atomic_float : enable\n"
1419                         << "#extension GL_EXT_shader_atomic_float2 : enable\n"
1420                         << "#extension GL_KHR_memory_scope_semantics : enable\n"
1421                         ;
1422         }
1423
1424         if (memoryType == AtomicMemoryType::REFERENCE)
1425         {
1426                 extensions << "#extension GL_EXT_buffer_reference : require\n";
1427         }
1428
1429         // Specializations.
1430         std::map<std::string, std::string> specializations;
1431
1432         specializations["EXTENSIONS"]                   = extensions.str();
1433         specializations["DATATYPE"]                             = dataType2Str(m_dataType);
1434         specializations["ATOMICOP"]                             = atomicOp2Str(m_atomicOp);
1435         specializations["SETIDX"]                               = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1436         specializations["N"]                                    = de::toString((int)NUM_ELEMENTS);
1437         specializations["COMPARE_ARG"]                  = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1438         specializations["RESULT_BUFFER_NAME"]   = ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
1439
1440         // Shader spec.
1441         m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1442         m_shaderSpec.glslVersion                = glu::GLSL_VERSION_450;
1443         m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1444         m_shaderSpec.source                             = ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1445                                                                                 ? vertexShaderTemplateSrc.specialize(specializations)
1446                                                                                 : nonVertexShaderTemplateSrc.specialize(specializations));
1447
1448         if (memoryType == AtomicMemoryType::SHARED)
1449         {
1450                 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1451                 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1452         }
1453 }
1454
1455 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1456 {
1457         tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1458
1459         static const struct
1460         {
1461                 glu::ShaderType         type;
1462                 const char*                     name;
1463         } shaderTypes[] =
1464         {
1465                 { glu::SHADERTYPE_VERTEX,                                                       "vertex"                        },
1466                 { glu::SHADERTYPE_FRAGMENT,                                                     "fragment"                      },
1467                 { glu::SHADERTYPE_GEOMETRY,                                                     "geometry"                      },
1468                 { glu::SHADERTYPE_TESSELLATION_CONTROL,                         "tess_ctrl"                     },
1469                 { glu::SHADERTYPE_TESSELLATION_EVALUATION,                      "tess_eval"                     },
1470                 { glu::SHADERTYPE_COMPUTE,                                                      "compute"                       },
1471         };
1472
1473         static const struct
1474         {
1475                 AtomicMemoryType        type;
1476                 const char*                     suffix;
1477         } kMemoryTypes[] =
1478         {
1479                 { AtomicMemoryType::BUFFER,             ""                              },
1480                 { AtomicMemoryType::SHARED,             "_shared"               },
1481                 { AtomicMemoryType::REFERENCE,  "_reference"    },
1482         };
1483
1484         static const struct
1485         {
1486                 DataType                dataType;
1487                 const char*             name;
1488                 const char*             description;
1489         } dataSign[] =
1490         {
1491                 { DATA_TYPE_FLOAT16,"float16",                  "Tests using 16-bit float data"                         },
1492                 { DATA_TYPE_INT32,      "signed",                       "Tests using signed data (int)"                         },
1493                 { DATA_TYPE_UINT32,     "unsigned",                     "Tests using unsigned data (uint)"                      },
1494                 { DATA_TYPE_FLOAT32,"float32",                  "Tests using 32-bit float data"                         },
1495                 { DATA_TYPE_INT64,      "signed64bit",          "Tests using 64 bit signed data (int64)"        },
1496                 { DATA_TYPE_UINT64,     "unsigned64bit",        "Tests using 64 bit unsigned data (uint64)"     },
1497                 { DATA_TYPE_FLOAT64,"float64",                  "Tests using 64-bit float data)"                        }
1498         };
1499
1500         static const struct
1501         {
1502                 AtomicOperation         value;
1503                 const char*                     name;
1504         } atomicOp[] =
1505         {
1506                 { ATOMIC_OP_EXCHANGE,   "exchange"      },
1507                 { ATOMIC_OP_COMP_SWAP,  "comp_swap"     },
1508                 { ATOMIC_OP_ADD,                "add"           },
1509                 { ATOMIC_OP_MIN,                "min"           },
1510                 { ATOMIC_OP_MAX,                "max"           },
1511                 { ATOMIC_OP_AND,                "and"           },
1512                 { ATOMIC_OP_OR,                 "or"            },
1513                 { ATOMIC_OP_XOR,                "xor"           }
1514         };
1515
1516         for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1517         {
1518                 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1519                 {
1520                         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1521                         {
1522                                 // Only ADD and EXCHANGE are supported on floating-point
1523                                 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1524                                 {
1525                                         if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
1526                                             atomicOp[opNdx].value != ATOMIC_OP_MIN &&
1527                                             atomicOp[opNdx].value != ATOMIC_OP_MAX &&
1528                                             atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1529                                         {
1530                                                 continue;
1531                                         }
1532                                 }
1533
1534                                 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1535                                 {
1536                                         // Shared memory only available in compute shaders.
1537                                         if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
1538                                                 continue;
1539
1540                                         const std::string description   = std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1541                                         const std::string name                  = std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1542
1543                                         atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1544                                 }
1545                         }
1546                 }
1547         }
1548 }
1549
1550 } // anonymous
1551
1552 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1553 {
1554         return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
1555 }
1556
1557 } // shaderexecutor
1558 } // vkt