external/vulkancts/modules/vulkan/shaderexecutor/vktAtomicOperationTests.cpp

   1 /*------------------------------------------------------------------------
   2  * Vulkan Conformance Tests
   3  * ------------------------
   4  *
   5  * Copyright (c) 2015 The Khronos Group Inc.
   6  * Copyright (c) 2017 Google Inc.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  *
  20  *//*!
  21  * \file
  22  * \brief Atomic operations (OpAtomic*) tests.
  23  *//*--------------------------------------------------------------------*/
  24
  25 #include "vktAtomicOperationTests.hpp"
  26 #include "vktShaderExecutor.hpp"
  27
  28 #include "vkRefUtil.hpp"
  29 #include "vkMemUtil.hpp"
  30 #include "vkQueryUtil.hpp"
  31 #include "vkObjUtil.hpp"
  32 #include "vkBarrierUtil.hpp"
  33 #include "vkCmdUtil.hpp"
  34 #include "vktTestGroupUtil.hpp"
  35
  36 #include "tcuTestLog.hpp"
  37 #include "tcuStringTemplate.hpp"
  38 #include "tcuResultCollector.hpp"
  39
  40 #include "deStringUtil.hpp"
  41 #include "deSharedPtr.hpp"
  42 #include "deRandom.hpp"
  43 #include "deArrayUtil.hpp"
  44
  45 #include <string>
  46 #include <memory>
  47 #include <cmath>
  48
  49 namespace vkt
  50 {
  51 namespace shaderexecutor
  52 {
  53
  54 namespace
  55 {
  56
  57 using de::UniquePtr;
  58 using de::MovePtr;
  59 using std::vector;
  60
  61 using namespace vk;
  62
  63 enum class AtomicMemoryType
  64 {
  65         BUFFER = 0,     // Normal buffer.
  66         SHARED,         // Shared global struct in a compute workgroup.
  67         REFERENCE,      // Buffer passed as a reference.
  68 };
  69
  70 // Helper struct to indicate the shader type and if it should use shared global memory.
  71 class AtomicShaderType
  72 {
  73 public:
  74         AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
  75                 : m_type                                (type)
  76                 , m_atomicMemoryType    (memoryType)
  77         {
  78                 // Shared global memory can only be set to true with compute shaders.
  79                 DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
  80         }
  81
  82         glu::ShaderType         getType                                 (void) const    { return m_type; }
  83         AtomicMemoryType        getMemoryType                   (void) const    { return m_atomicMemoryType; }
  84
  85 private:
  86         glu::ShaderType         m_type;
  87         AtomicMemoryType        m_atomicMemoryType;
  88 };
  89
  90 // Buffer helper
  91 class Buffer
  92 {
  93 public:
  94                                                 Buffer                          (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
  95
  96         VkBuffer                        getBuffer                       (void) const { return *m_buffer;                                        }
  97         void*                           getHostPtr                      (void) const { return m_allocation->getHostPtr();       }
  98         void                            flush                           (void);
  99         void                            invalidate                      (void);
 100
 101 private:
 102         const DeviceInterface&          m_vkd;
 103         const VkDevice                          m_device;
 104         const VkQueue                           m_queue;
 105         const deUint32                          m_queueIndex;
 106         const Unique<VkBuffer>          m_buffer;
 107         const UniquePtr<Allocation>     m_allocation;
 108 };
 109
 110 typedef de::SharedPtr<Buffer> BufferSp;
 111
 112 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
 113 {
 114         const VkBufferCreateInfo createInfo     =
 115         {
 116                 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
 117                 DE_NULL,
 118                 (VkBufferCreateFlags)0,
 119                 size,
 120                 usageFlags,
 121                 VK_SHARING_MODE_EXCLUSIVE,
 122                 0u,
 123                 DE_NULL
 124         };
 125         return createBuffer(vkd, device, &createInfo);
 126 }
 127
 128 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
 129 {
 130         const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
 131         MovePtr<Allocation>     alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
 132
 133         VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
 134
 135         return alloc;
 136 }
 137
 138 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
 139         : m_vkd                 (context.getDeviceInterface())
 140         , m_device              (context.getDevice())
 141         , m_queue               (context.getUniversalQueue())
 142         , m_queueIndex  (context.getUniversalQueueFamilyIndex())
 143         , m_buffer              (createBuffer                   (context.getDeviceInterface(),
 144                                                                                          context.getDevice(),
 145                                                                                          (VkDeviceSize)size,
 146                                                                                          usage))
 147         , m_allocation  (allocateAndBindMemory  (context.getDeviceInterface(),
 148                                                                                          context.getDevice(),
 149                                                                                          context.getDefaultAllocator(),
 150                                                                                          *m_buffer,
 151                                                                                          useRef))
 152 {
 153 }
 154
 155 void Buffer::flush (void)
 156 {
 157         flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
 158 }
 159
 160 void Buffer::invalidate (void)
 161 {
 162         const auto      cmdPool                 = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
 163         const auto      cmdBufferPtr    = vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
 164         const auto      cmdBuffer               = cmdBufferPtr.get();
 165         const auto      bufferBarrier   = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
 166
 167         beginCommandBuffer(m_vkd, cmdBuffer);
 168         m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
 169         endCommandBuffer(m_vkd, cmdBuffer);
 170         submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
 171
 172         invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
 173 }
 174
 175 // Tests
 176
 177 enum AtomicOperation
 178 {
 179         ATOMIC_OP_EXCHANGE = 0,
 180         ATOMIC_OP_COMP_SWAP,
 181         ATOMIC_OP_ADD,
 182         ATOMIC_OP_MIN,
 183         ATOMIC_OP_MAX,
 184         ATOMIC_OP_AND,
 185         ATOMIC_OP_OR,
 186         ATOMIC_OP_XOR,
 187
 188         ATOMIC_OP_LAST
 189 };
 190
 191 std::string atomicOp2Str (AtomicOperation op)
 192 {
 193         static const char* const s_names[] =
 194         {
 195                 "atomicExchange",
 196                 "atomicCompSwap",
 197                 "atomicAdd",
 198                 "atomicMin",
 199                 "atomicMax",
 200                 "atomicAnd",
 201                 "atomicOr",
 202                 "atomicXor"
 203         };
 204         return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
 205 }
 206
 207 enum
 208 {
 209         NUM_ELEMENTS = 32
 210 };
 211
 212 enum DataType
 213 {
 214         DATA_TYPE_INT32 = 0,
 215         DATA_TYPE_UINT32,
 216         DATA_TYPE_FLOAT32,
 217         DATA_TYPE_INT64,
 218         DATA_TYPE_UINT64,
 219         DATA_TYPE_FLOAT64,
 220
 221         DATA_TYPE_LAST
 222 };
 223
 224 std::string dataType2Str(DataType type)
 225 {
 226         static const char* const s_names[] =
 227         {
 228                 "int",
 229                 "uint",
 230                 "float",
 231                 "int64_t",
 232                 "uint64_t",
 233                 "double",
 234         };
 235         return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
 236 }
 237
 238 class BufferInterface
 239 {
 240 public:
 241         virtual void setBuffer(void* ptr) = 0;
 242
 243         virtual size_t bufferSize() = 0;
 244
 245         virtual void fillWithTestData(de::Random &rnd) = 0;
 246
 247         virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
 248
 249         virtual ~BufferInterface() {};
 250 };
 251
 252 template<typename dataTypeT>
 253 class TestBuffer : public BufferInterface
 254 {
 255 public:
 256
 257         TestBuffer(AtomicOperation      atomicOp)
 258                 : m_atomicOp(atomicOp)
 259         {}
 260
 261         template<typename T>
 262         struct BufferData
 263         {
 264                 // Use half the number of elements for inout to cause overlap between atomic operations.
 265                 // Each inout element at index i will have two atomic operations using input from
 266                 // indices i and i + NUM_ELEMENTS / 2.
 267                 T                       inout[NUM_ELEMENTS / 2];
 268                 T                       input[NUM_ELEMENTS];
 269                 T                       compare[NUM_ELEMENTS];
 270                 T                       output[NUM_ELEMENTS];
 271                 T                       invocationHitCount[NUM_ELEMENTS];
 272                 deInt32         index;
 273         };
 274
 275         virtual void setBuffer(void* ptr)
 276         {
 277                 m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
 278         }
 279
 280         virtual size_t bufferSize()
 281         {
 282                 return sizeof(BufferData<dataTypeT>);
 283         }
 284
 285         virtual void fillWithTestData(de::Random &rnd)
 286         {
 287                 dataTypeT pattern;
 288                 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
 289
 290                 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
 291                 {
 292                         m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
 293                         // The first half of compare elements match with every even index.
 294                         // The second half matches with odd indices. This causes the
 295                         // overlapping operations to only select one.
 296                         m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
 297                         m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
 298                 }
 299                 for (int i = 0; i < NUM_ELEMENTS; i++)
 300                 {
 301                         m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
 302                         m_ptr->output[i] = pattern;
 303                         m_ptr->invocationHitCount[i] = 0;
 304                 }
 305                 m_ptr->index = 0;
 306
 307                 // Take a copy to be used when calculating expected values.
 308                 m_original = *m_ptr;
 309         }
 310
 311         virtual void checkResults(tcu::ResultCollector& resultCollector)
 312         {
 313                 checkOperation(m_original, *m_ptr, resultCollector);
 314         }
 315
 316         template<typename T>
 317         struct Expected
 318         {
 319                 T m_inout;
 320                 T m_output[2];
 321
 322                 Expected (T inout, T output0, T output1)
 323                 : m_inout(inout)
 324                 {
 325                         m_output[0] = output0;
 326                         m_output[1] = output1;
 327                 }
 328
 329                 bool compare (T inout, T output0, T output1)
 330                 {
 331                         return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
 332                                         && deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
 333                                         && deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
 334                 }
 335         };
 336
 337         void checkOperation     (const BufferData<dataTypeT>&   original,
 338                                                  const BufferData<dataTypeT>&   result,
 339                                                  tcu::ResultCollector&                  resultCollector);
 340
 341         const AtomicOperation   m_atomicOp;
 342
 343         BufferData<dataTypeT>* m_ptr;
 344         BufferData<dataTypeT>  m_original;
 345
 346 };
 347
 348 template<typename dataTypeT>
 349 class TestBufferFloatingPoint : public BufferInterface
 350 {
 351 public:
 352
 353         TestBufferFloatingPoint(AtomicOperation atomicOp)
 354                 : m_atomicOp(atomicOp)
 355         {}
 356
 357         template<typename T>
 358         struct BufferDataFloatingPoint
 359         {
 360                 // Use half the number of elements for inout to cause overlap between atomic operations.
 361                 // Each inout element at index i will have two atomic operations using input from
 362                 // indices i and i + NUM_ELEMENTS / 2.
 363                 T                       inout[NUM_ELEMENTS / 2];
 364                 T                       input[NUM_ELEMENTS];
 365                 T                       compare[NUM_ELEMENTS];
 366                 T                       output[NUM_ELEMENTS];
 367                 T                       invocationHitCount[NUM_ELEMENTS];
 368                 deInt32         index;
 369         };
 370
 371         virtual void setBuffer(void* ptr)
 372         {
 373                 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
 374         }
 375
 376         virtual size_t bufferSize()
 377         {
 378                 return sizeof(BufferDataFloatingPoint<dataTypeT>);
 379         }
 380
 381         virtual void fillWithTestData(de::Random& rnd)
 382         {
 383                 dataTypeT pattern;
 384                 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
 385
 386                 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
 387                 {
 388                         m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getFloat());
 389                         // The first half of compare elements match with every even index.
 390                         // The second half matches with odd indices. This causes the
 391                         // overlapping operations to only select one.
 392                         m_ptr->compare[i] = m_ptr->inout[i] + (dataTypeT)(i % 2);
 393                         m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + (dataTypeT)(1 - (i % 2));
 394                 }
 395                 for (int i = 0; i < NUM_ELEMENTS; i++)
 396                 {
 397                         m_ptr->input[i] = static_cast<dataTypeT>(rnd.getFloat());
 398                         m_ptr->output[i] = pattern;
 399                         m_ptr->invocationHitCount[i] = 0;
 400                 }
 401                 m_ptr->index = 0;
 402
 403                 // Take a copy to be used when calculating expected values.
 404                 m_original = *m_ptr;
 405         }
 406
 407         virtual void checkResults(tcu::ResultCollector& resultCollector)
 408         {
 409                 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
 410         }
 411
 412         template<typename T>
 413         struct Expected
 414         {
 415                 T m_inout;
 416                 T m_output[2];
 417
 418                 Expected(T inout, T output0, T output1)
 419                         : m_inout(inout)
 420                 {
 421                         m_output[0] = output0;
 422                         m_output[1] = output1;
 423                 }
 424
 425                 bool compare(T inout, T output0, T output1)
 426                 {
 427                         T diff1 = static_cast<T>(fabs(m_inout - inout));
 428                         T diff2 = static_cast<T>(fabs(m_output[0] - output0));
 429                         T diff3 = static_cast<T>(fabs(m_output[1] - output1));
 430                         const T epsilon = static_cast<T>(0.00001);
 431                         return (diff1 < epsilon) && (diff2 < epsilon) && (diff3 < epsilon);
 432                 }
 433         };
 434
 435         void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
 436                 const BufferDataFloatingPoint<dataTypeT>& result,
 437                 tcu::ResultCollector& resultCollector);
 438
 439         const AtomicOperation   m_atomicOp;
 440
 441         BufferDataFloatingPoint<dataTypeT>* m_ptr;
 442         BufferDataFloatingPoint<dataTypeT>  m_original;
 443
 444 };
 445
 446 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
 447 {
 448         switch (type)
 449         {
 450         case DATA_TYPE_INT32:
 451                 return new TestBuffer<deInt32>(atomicOp);
 452         case DATA_TYPE_UINT32:
 453                 return new TestBuffer<deUint32>(atomicOp);
 454         case DATA_TYPE_FLOAT32:
 455                 return new TestBufferFloatingPoint<float>(atomicOp);
 456         case DATA_TYPE_INT64:
 457                 return new TestBuffer<deInt64>(atomicOp);
 458         case DATA_TYPE_UINT64:
 459                 return new TestBuffer<deUint64>(atomicOp);
 460         case DATA_TYPE_FLOAT64:
 461                 return new TestBufferFloatingPoint<double>(atomicOp);
 462         default:
 463                 DE_ASSERT(false);
 464                 return DE_NULL;
 465         }
 466 }
 467
 468 // Use template to handle both signed and unsigned cases. SPIR-V should
 469 // have separate operations for both.
 470 template<typename T>
 471 void TestBuffer<T>::checkOperation (const BufferData<T>&        original,
 472                                                                         const BufferData<T>&    result,
 473                                                                         tcu::ResultCollector&   resultCollector)
 474 {
 475         // originalInout = original inout
 476         // input0 = input at index i
 477         // iinput1 = input at index i + NUM_ELEMENTS / 2
 478         //
 479         // atomic operation will return the memory contents before
 480         // the operation and this is stored as output. Two operations
 481         // are executed for each InOut value (using input0 and input1).
 482         //
 483         // Since there is an overlap of two operations per each
 484         // InOut element, the outcome of the resulting InOut and
 485         // the outputs of the operations have two result candidates
 486         // depending on the execution order. Verification passes
 487         // if the results match one of these options.
 488
 489         for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
 490         {
 491                 // Needed when reinterpeting the data as signed values.
 492                 const T originalInout   = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
 493                 const T input0                  = *reinterpret_cast<const T*>(&original.input[elementNdx]);
 494                 const T input1                  = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
 495
 496                 // Expected results are collected to this vector.
 497                 vector<Expected<T> > exp;
 498
 499                 switch (m_atomicOp)
 500                 {
 501                         case ATOMIC_OP_ADD:
 502                         {
 503                                 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
 504                                 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
 505                         }
 506                         break;
 507
 508                         case ATOMIC_OP_AND:
 509                         {
 510                                 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
 511                                 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
 512                         }
 513                         break;
 514
 515                         case ATOMIC_OP_OR:
 516                         {
 517                                 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
 518                                 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
 519                         }
 520                         break;
 521
 522                         case ATOMIC_OP_XOR:
 523                         {
 524                                 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
 525                                 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
 526                         }
 527                         break;
 528
 529                         case ATOMIC_OP_MIN:
 530                         {
 531                                 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
 532                                 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
 533                         }
 534                         break;
 535
 536                         case ATOMIC_OP_MAX:
 537                         {
 538                                 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
 539                                 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
 540                         }
 541                         break;
 542
 543                         case ATOMIC_OP_EXCHANGE:
 544                         {
 545                                 exp.push_back(Expected<T>(input1, originalInout, input0));
 546                                 exp.push_back(Expected<T>(input0, input1, originalInout));
 547                         }
 548                         break;
 549
 550                         case ATOMIC_OP_COMP_SWAP:
 551                         {
 552                                 if (elementNdx % 2 == 0)
 553                                 {
 554                                         exp.push_back(Expected<T>(input0, originalInout, input0));
 555                                         exp.push_back(Expected<T>(input0, originalInout, originalInout));
 556                                 }
 557                                 else
 558                                 {
 559                                         exp.push_back(Expected<T>(input1, input1, originalInout));
 560                                         exp.push_back(Expected<T>(input1, originalInout, originalInout));
 561                                 }
 562                         }
 563                         break;
 564
 565
 566                         default:
 567                                 DE_FATAL("Unexpected atomic operation.");
 568                                 break;
 569                 };
 570
 571                 const T resIo           = result.inout[elementNdx];
 572                 const T resOutput0      = result.output[elementNdx];
 573                 const T resOutput1      = result.output[elementNdx + NUM_ELEMENTS / 2];
 574
 575
 576                 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
 577                 {
 578                         std::ostringstream errorMessage;
 579                         errorMessage    << "ERROR: Result value check failed at index " << elementNdx
 580                                                         << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
 581                                                         << ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
 582                                                         << tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
 583                                                         << ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
 584                                                         << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
 585                                                         << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
 586                                                         << tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
 587                                                         << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
 588
 589                         resultCollector.fail(errorMessage.str());
 590                 }
 591         }
 592 }
 593
 594 // Use template to handle both float and double cases. SPIR-V should
 595 // have separate operations for both.
 596 template<typename T>
 597 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
 598         const BufferDataFloatingPoint<T>& result,
 599         tcu::ResultCollector& resultCollector)
 600 {
 601         // originalInout = original inout
 602         // input0 = input at index i
 603         // iinput1 = input at index i + NUM_ELEMENTS / 2
 604         //
 605         // atomic operation will return the memory contents before
 606         // the operation and this is stored as output. Two operations
 607         // are executed for each InOut value (using input0 and input1).
 608         //
 609         // Since there is an overlap of two operations per each
 610         // InOut element, the outcome of the resulting InOut and
 611         // the outputs of the operations have two result candidates
 612         // depending on the execution order. Verification passes
 613         // if the results match one of these options.
 614
 615         for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
 616         {
 617                 // Needed when reinterpeting the data as signed values.
 618                 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
 619                 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
 620                 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
 621
 622                 // Expected results are collected to this vector.
 623                 vector<Expected<T> > exp;
 624
 625                 switch (m_atomicOp)
 626                 {
 627                 case ATOMIC_OP_ADD:
 628                 {
 629                         exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
 630                         exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
 631                 }
 632                 break;
 633
 634                 case ATOMIC_OP_EXCHANGE:
 635                 {
 636                         exp.push_back(Expected<T>(input1, originalInout, input0));
 637                         exp.push_back(Expected<T>(input0, input1, originalInout));
 638                 }
 639                 break;
 640
 641                 default:
 642                         DE_FATAL("Unexpected atomic operation.");
 643                         break;
 644                 };
 645
 646                 const T resIo = result.inout[elementNdx];
 647                 const T resOutput0 = result.output[elementNdx];
 648                 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
 649
 650
 651                 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
 652                 {
 653                         std::ostringstream errorMessage;
 654                         errorMessage << "ERROR: Result value check failed at index " << elementNdx
 655                                 << ". Expected one of the two outcomes: InOut = " << exp[0].m_inout
 656                                 << ", Output0 = " << exp[0].m_output[0] << ", Output1 = "
 657                                 << exp[0].m_output[1] << ", or InOut = " << exp[1].m_inout
 658                                 << ", Output0 = " << exp[1].m_output[0] << ", Output1 = "
 659                                 << exp[1].m_output[1] << ". Got: InOut = " << resIo
 660                                 << ", Output0 = " << resOutput0 << ", Output1 = "
 661                                 << resOutput1 << ". Using Input0 = " << original.input[elementNdx]
 662                                 << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
 663
 664                         resultCollector.fail(errorMessage.str());
 665                 }
 666         }
 667 }
 668
 669 class AtomicOperationCaseInstance : public TestInstance
 670 {
 671 public:
 672                                                                         AtomicOperationCaseInstance             (Context&                       context,
 673                                                                                                                                          const ShaderSpec&      shaderSpec,
 674                                                                                                                                          AtomicShaderType       shaderType,
 675                                                                                                                                          DataType                       dataType,
 676                                                                                                                                          AtomicOperation        atomicOp);
 677
 678         virtual tcu::TestStatus                 iterate                                                 (void);
 679
 680 private:
 681         const ShaderSpec&                               m_shaderSpec;
 682         AtomicShaderType                                m_shaderType;
 683         const DataType                                  m_dataType;
 684         AtomicOperation                                 m_atomicOp;
 685
 686 };
 687
 688 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context&                              context,
 689                                                                                                                   const ShaderSpec&             shaderSpec,
 690                                                                                                                   AtomicShaderType              shaderType,
 691                                                                                                                   DataType                              dataType,
 692                                                                                                                   AtomicOperation               atomicOp)
 693         : TestInstance  (context)
 694         , m_shaderSpec  (shaderSpec)
 695         , m_shaderType  (shaderType)
 696         , m_dataType    (dataType)
 697         , m_atomicOp    (atomicOp)
 698 {
 699 }
 700
 701 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
 702 {
 703         de::UniquePtr<BufferInterface>  testBuffer      (createTestBuffer(m_dataType, m_atomicOp));
 704         tcu::TestLog&                                   log                     = m_context.getTestContext().getLog();
 705         const DeviceInterface&                  vkd                     = m_context.getDeviceInterface();
 706         const VkDevice                                  device          = m_context.getDevice();
 707         de::Random                                              rnd                     (0x62a15e34);
 708         const bool                                              useRef          = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
 709         const VkDescriptorType                  descType        = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
 710         const VkBufferUsageFlags                usageFlags      = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
 711
 712         // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
 713         // a uniform buffer. If not, it will be passed directly as a descriptor.
 714         Buffer                                                  buffer          (m_context, usageFlags, testBuffer->bufferSize(), useRef);
 715         std::unique_ptr<Buffer>                 auxBuffer;
 716
 717         if (useRef)
 718         {
 719                 // Pass the main buffer address inside a uniform buffer.
 720                 const VkBufferDeviceAddressInfo addressInfo =
 721                 {
 722                         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,   //      VkStructureType sType;
 723                         nullptr,                                                                                //      const void*             pNext;
 724                         buffer.getBuffer(),                                                             //      VkBuffer                buffer;
 725                 };
 726                 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
 727
 728                 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
 729                 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
 730                 auxBuffer->flush();
 731         }
 732
 733         testBuffer->setBuffer(buffer.getHostPtr());
 734         testBuffer->fillWithTestData(rnd);
 735
 736         buffer.flush();
 737
 738         Move<VkDescriptorSetLayout>     extraResourcesLayout;
 739         Move<VkDescriptorPool>          extraResourcesSetPool;
 740         Move<VkDescriptorSet>           extraResourcesSet;
 741
 742         const VkDescriptorSetLayoutBinding bindings[] =
 743         {
 744                 { 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
 745         };
 746
 747         const VkDescriptorSetLayoutCreateInfo   layoutInfo      =
 748         {
 749                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
 750                 DE_NULL,
 751                 (VkDescriptorSetLayoutCreateFlags)0u,
 752                 DE_LENGTH_OF_ARRAY(bindings),
 753                 bindings
 754         };
 755
 756         extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
 757
 758         const VkDescriptorPoolSize poolSizes[] =
 759         {
 760                 { descType, 1u }
 761         };
 762
 763         const VkDescriptorPoolCreateInfo poolInfo =
 764         {
 765                 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
 766                 DE_NULL,
 767                 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
 768                 1u,             // maxSets
 769                 DE_LENGTH_OF_ARRAY(poolSizes),
 770                 poolSizes
 771         };
 772
 773         extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
 774
 775         const VkDescriptorSetAllocateInfo allocInfo =
 776         {
 777                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
 778                 DE_NULL,
 779                 *extraResourcesSetPool,
 780                 1u,
 781                 &extraResourcesLayout.get()
 782         };
 783
 784         extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
 785
 786         VkDescriptorBufferInfo bufferInfo;
 787         bufferInfo.buffer       = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
 788         bufferInfo.offset       = 0u;
 789         bufferInfo.range        = VK_WHOLE_SIZE;
 790
 791         const VkWriteDescriptorSet descriptorWrite =
 792         {
 793                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 794                 DE_NULL,
 795                 *extraResourcesSet,
 796                 0u,             // dstBinding
 797                 0u,             // dstArrayElement
 798                 1u,
 799                 descType,
 800                 (const VkDescriptorImageInfo*)DE_NULL,
 801                 &bufferInfo,
 802                 (const VkBufferView*)DE_NULL
 803         };
 804
 805         vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
 806
 807         // Storage for output varying data.
 808         std::vector<deUint32>   outputs         (NUM_ELEMENTS);
 809         std::vector<void*>              outputPtr       (NUM_ELEMENTS);
 810
 811         for (size_t i = 0; i < NUM_ELEMENTS; i++)
 812         {
 813                 outputs[i] = 0xcdcdcdcd;
 814                 outputPtr[i] = &outputs[i];
 815         }
 816
 817         const int                                       numWorkGroups   = ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
 818         UniquePtr<ShaderExecutor>       executor                (createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
 819
 820         executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
 821         buffer.invalidate();
 822
 823         tcu::ResultCollector resultCollector(log);
 824
 825         // Check the results of the atomic operation
 826         testBuffer->checkResults(resultCollector);
 827
 828         return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
 829 }
 830
 831 class AtomicOperationCase : public TestCase
 832 {
 833 public:
 834                                                         AtomicOperationCase             (tcu::TestContext&              testCtx,
 835                                                                                                          const char*                    name,
 836                                                                                                          const char*                    description,
 837                                                                                                          AtomicShaderType               type,
 838                                                                                                          DataType                               dataType,
 839                                                                                                          AtomicOperation                atomicOp);
 840         virtual                                 ~AtomicOperationCase    (void);
 841
 842         virtual TestInstance*   createInstance                  (Context& ctx) const;
 843         virtual void                    checkSupport                    (Context& ctx) const;
 844         virtual void                    initPrograms                    (vk::SourceCollections& programCollection) const
 845         {
 846                 generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
 847         }
 848
 849 private:
 850
 851         void                                    createShaderSpec();
 852         ShaderSpec                              m_shaderSpec;
 853         const AtomicShaderType  m_shaderType;
 854         const DataType                  m_dataType;
 855         const AtomicOperation   m_atomicOp;
 856 };
 857
 858 AtomicOperationCase::AtomicOperationCase (tcu::TestContext&     testCtx,
 859                                                                                   const char*           name,
 860                                                                                   const char*           description,
 861                                                                                   AtomicShaderType      shaderType,
 862                                                                                   DataType                      dataType,
 863                                                                                   AtomicOperation       atomicOp)
 864         : TestCase                      (testCtx, name, description)
 865         , m_shaderType          (shaderType)
 866         , m_dataType            (dataType)
 867         , m_atomicOp            (atomicOp)
 868 {
 869         createShaderSpec();
 870         init();
 871 }
 872
 873 AtomicOperationCase::~AtomicOperationCase (void)
 874 {
 875 }
 876
 877 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
 878 {
 879         return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
 880 }
 881
 882 void AtomicOperationCase::checkSupport (Context& ctx) const
 883 {
 884         if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
 885         {
 886                 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
 887
 888                 const auto atomicInt64Features  = ctx.getShaderAtomicInt64Features();
 889                 const bool isSharedMemory               = (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
 890
 891                 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
 892                 {
 893                         TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
 894                 }
 895                 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
 896                 {
 897                         TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
 898                 }
 899         }
 900
 901         if (m_dataType == DATA_TYPE_FLOAT32)
 902         {
 903                 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
 904                 if (m_atomicOp == ATOMIC_OP_ADD)
 905                 {
 906                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
 907                         {
 908                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
 909                                 {
 910                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
 911                                 }
 912                         }
 913                         else
 914                         {
 915                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
 916                                 {
 917                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
 918                                 }
 919                         }
 920                 }
 921                 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
 922                 {
 923                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
 924                         {
 925                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
 926                                 {
 927                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
 928                                 }
 929                         }
 930                         else
 931                         {
 932                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
 933                                 {
 934                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
 935                                 }
 936                         }
 937                 }
 938         }
 939
 940         if (m_dataType == DATA_TYPE_FLOAT64)
 941         {
 942                 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
 943                 if (m_atomicOp == ATOMIC_OP_ADD)
 944                 {
 945                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
 946                         {
 947                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
 948                                 {
 949                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
 950                                 }
 951                         }
 952                         else
 953                         {
 954                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
 955                                 {
 956                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
 957                                 }
 958                         }
 959                 }
 960                 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
 961                 {
 962                         if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
 963                         {
 964                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
 965                                 {
 966                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
 967                                 }
 968                         }
 969                         else
 970                         {
 971                                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
 972                                 {
 973                                         TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
 974                                 }
 975                         }
 976                 }
 977         }
 978
 979         if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
 980         {
 981                 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
 982         }
 983
 984         // Check stores and atomic operation support.
 985         switch (m_shaderType.getType())
 986         {
 987         case glu::SHADERTYPE_VERTEX:
 988         case glu::SHADERTYPE_TESSELLATION_CONTROL:
 989         case glu::SHADERTYPE_TESSELLATION_EVALUATION:
 990         case glu::SHADERTYPE_GEOMETRY:
 991                 if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
 992                         TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
 993                 break;
 994         case glu::SHADERTYPE_FRAGMENT:
 995                 if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
 996                         TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
 997                 break;
 998         case glu::SHADERTYPE_COMPUTE:
 999                 break;
1000         default:
1001                 DE_FATAL("Unsupported shader type");
1002         }
1003
1004         checkSupportShader(ctx, m_shaderType.getType());
1005 }
1006
1007 void AtomicOperationCase::createShaderSpec (void)
1008 {
1009         const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1010
1011         // Global declarations.
1012         std::ostringstream shaderTemplateGlobalStream;
1013
1014         // Structure in use for atomic operations.
1015         shaderTemplateGlobalStream
1016                 << "${EXTENSIONS}\n"
1017                 << "\n"
1018                 << "struct AtomicStruct\n"
1019                 << "{\n"
1020                 << "    ${DATATYPE} inoutValues[${N}/2];\n"
1021                 << "    ${DATATYPE} inputValues[${N}];\n"
1022                 << "    ${DATATYPE} compareValues[${N}];\n"
1023                 << "    ${DATATYPE} outputValues[${N}];\n"
1024                 << "    int invocationHitCount[${N}];\n"
1025                 << "    int index;\n"
1026                 << "};\n"
1027                 << "\n"
1028                 ;
1029
1030         // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1031         // as "buf.data", which is the name used in the atomic operation statements.
1032         //
1033         // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1034         // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1035         // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1036         //
1037         if (memoryType != AtomicMemoryType::REFERENCE)
1038         {
1039                 shaderTemplateGlobalStream
1040                         << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1041                         << "    AtomicStruct data;\n"
1042                         << "} ${RESULT_BUFFER_NAME};\n"
1043                         << "\n"
1044                         ;
1045
1046                 // When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
1047                 // descriptor set as the sources and results of each tested operation.
1048                 if (memoryType == AtomicMemoryType::SHARED)
1049                 {
1050                         shaderTemplateGlobalStream
1051                                 << "shared struct { AtomicStruct data; } buf;\n"
1052                                 << "\n"
1053                                 ;
1054                 }
1055         }
1056         else
1057         {
1058                 shaderTemplateGlobalStream
1059                         << "layout (buffer_reference) buffer AtomicBuffer {\n"
1060                         << "    AtomicStruct data;\n"
1061                         << "};\n"
1062                         << "\n"
1063                         << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1064                         << "    AtomicBuffer buf;\n"
1065                         << "};\n"
1066                         << "\n"
1067                         ;
1068         }
1069
1070         const auto                                      shaderTemplateGlobalString      = shaderTemplateGlobalStream.str();
1071         const tcu::StringTemplate       shaderTemplateGlobal            (shaderTemplateGlobalString);
1072
1073         // Shader body for the non-vertex case.
1074         std::ostringstream nonVertexShaderTemplateStream;
1075
1076         if (memoryType == AtomicMemoryType::SHARED)
1077         {
1078                 // Invocation zero will initialize the shared structure from the descriptor set.
1079                 nonVertexShaderTemplateStream
1080                         << "if (gl_LocalInvocationIndex == 0u)\n"
1081                         << "{\n"
1082                         << "    buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1083                         << "}\n"
1084                         << "barrier();\n"
1085                         ;
1086         }
1087
1088         if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1089         {
1090                 nonVertexShaderTemplateStream
1091                         << "if (!gl_HelperInvocation) {\n"
1092                         << "    int idx = atomicAdd(buf.data.index, 1);\n"
1093                         << "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1094                         << "}\n"
1095                         ;
1096         }
1097         else
1098         {
1099                 nonVertexShaderTemplateStream
1100                         << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1101                         << "{\n"
1102                         << "    int idx = atomicAdd(buf.data.index, 1);\n"
1103                         << "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1104                         << "}\n"
1105                         ;
1106         }
1107
1108         if (memoryType == AtomicMemoryType::SHARED)
1109         {
1110                 // Invocation zero will copy results back to the descriptor set.
1111                 nonVertexShaderTemplateStream
1112                         << "barrier();\n"
1113                         << "if (gl_LocalInvocationIndex == 0u)\n"
1114                         << "{\n"
1115                         << "    ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1116                         << "}\n"
1117                         ;
1118         }
1119
1120         const auto                                      nonVertexShaderTemplateStreamStr        = nonVertexShaderTemplateStream.str();
1121         const tcu::StringTemplate       nonVertexShaderTemplateSrc                      (nonVertexShaderTemplateStreamStr);
1122
1123         // Shader body for the vertex case.
1124         const tcu::StringTemplate vertexShaderTemplateSrc(
1125                 "int idx = gl_VertexIndex;\n"
1126                 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1127                 "{\n"
1128                 "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1129                 "}\n");
1130
1131         // Extensions.
1132         std::ostringstream extensions;
1133
1134         if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1135         {
1136                 extensions
1137                         << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1138                         << "#extension GL_EXT_shader_atomic_int64 : enable\n"
1139                         ;
1140         }
1141         else if ((m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1142         {
1143                 extensions
1144                         << "#extension GL_EXT_shader_atomic_float : enable\n"
1145                         << "#extension GL_KHR_memory_scope_semantics : enable\n"
1146                         ;
1147         }
1148
1149         if (memoryType == AtomicMemoryType::REFERENCE)
1150         {
1151                 extensions << "#extension GL_EXT_buffer_reference : require\n";
1152         }
1153
1154         // Specializations.
1155         std::map<std::string, std::string> specializations;
1156
1157         specializations["EXTENSIONS"]                   = extensions.str();
1158         specializations["DATATYPE"]                             = dataType2Str(m_dataType);
1159         specializations["ATOMICOP"]                             = atomicOp2Str(m_atomicOp);
1160         specializations["SETIDX"]                               = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1161         specializations["N"]                                    = de::toString((int)NUM_ELEMENTS);
1162         specializations["COMPARE_ARG"]                  = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1163         specializations["RESULT_BUFFER_NAME"]   = ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
1164
1165         // Shader spec.
1166         m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1167         m_shaderSpec.glslVersion                = glu::GLSL_VERSION_450;
1168         m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1169         m_shaderSpec.source                             = ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1170                                                                                 ? vertexShaderTemplateSrc.specialize(specializations)
1171                                                                                 : nonVertexShaderTemplateSrc.specialize(specializations));
1172
1173         if (memoryType == AtomicMemoryType::SHARED)
1174         {
1175                 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1176                 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1177         }
1178 }
1179
1180 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1181 {
1182         tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1183
1184         static const struct
1185         {
1186                 glu::ShaderType         type;
1187                 const char*                     name;
1188         } shaderTypes[] =
1189         {
1190                 { glu::SHADERTYPE_VERTEX,                                                       "vertex"                        },
1191                 { glu::SHADERTYPE_FRAGMENT,                                                     "fragment"                      },
1192                 { glu::SHADERTYPE_GEOMETRY,                                                     "geometry"                      },
1193                 { glu::SHADERTYPE_TESSELLATION_CONTROL,                         "tess_ctrl"                     },
1194                 { glu::SHADERTYPE_TESSELLATION_EVALUATION,                      "tess_eval"                     },
1195                 { glu::SHADERTYPE_COMPUTE,                                                      "compute"                       },
1196         };
1197
1198         static const struct
1199         {
1200                 AtomicMemoryType        type;
1201                 const char*                     suffix;
1202         } kMemoryTypes[] =
1203         {
1204                 { AtomicMemoryType::BUFFER,             ""                              },
1205                 { AtomicMemoryType::SHARED,             "_shared"               },
1206                 { AtomicMemoryType::REFERENCE,  "_reference"    },
1207         };
1208
1209         static const struct
1210         {
1211                 DataType                dataType;
1212                 const char*             name;
1213                 const char*             description;
1214         } dataSign[] =
1215         {
1216                 { DATA_TYPE_INT32,      "signed",                       "Tests using signed data (int)"                         },
1217                 { DATA_TYPE_UINT32,     "unsigned",                     "Tests using unsigned data (uint)"                      },
1218                 { DATA_TYPE_FLOAT32,"float32",                  "Tests using 32-bit float data"                         },
1219                 { DATA_TYPE_INT64,      "signed64bit",          "Tests using 64 bit signed data (int64)"        },
1220                 { DATA_TYPE_UINT64,     "unsigned64bit",        "Tests using 64 bit unsigned data (uint64)"     },
1221                 { DATA_TYPE_FLOAT64,"float64",                  "Tests using 64-bit float data)"                        }
1222         };
1223
1224         static const struct
1225         {
1226                 AtomicOperation         value;
1227                 const char*                     name;
1228         } atomicOp[] =
1229         {
1230                 { ATOMIC_OP_EXCHANGE,   "exchange"      },
1231                 { ATOMIC_OP_COMP_SWAP,  "comp_swap"     },
1232                 { ATOMIC_OP_ADD,                "add"           },
1233                 { ATOMIC_OP_MIN,                "min"           },
1234                 { ATOMIC_OP_MAX,                "max"           },
1235                 { ATOMIC_OP_AND,                "and"           },
1236                 { ATOMIC_OP_OR,                 "or"            },
1237                 { ATOMIC_OP_XOR,                "xor"           }
1238         };
1239
1240         for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1241         {
1242                 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1243                 {
1244                         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1245                         {
1246                                 // Only ADD and EXCHANGE are supported on floating-point
1247                                 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1248                                 {
1249                                         if (atomicOp[opNdx].value != ATOMIC_OP_ADD && atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1250                                         {
1251                                                 continue;
1252                                         }
1253                                 }
1254
1255                                 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1256                                 {
1257                                         // Shared memory only available in compute shaders.
1258                                         if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
1259                                                 continue;
1260
1261                                         const std::string description   = std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1262                                         const std::string name                  = std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1263
1264                                         atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1265                                 }
1266                         }
1267                 }
1268         }
1269 }
1270
1271 } // anonymous
1272
1273 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1274 {
1275         return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
1276 }
1277
1278 } // shaderexecutor
1279 } // vkt