1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2017 Google Inc.
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * \brief Atomic operations (OpAtomic*) tests.
23 *//*--------------------------------------------------------------------*/
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
40 #include "deStringUtil.hpp"
41 #include "deSharedPtr.hpp"
42 #include "deRandom.hpp"
43 #include "deArrayUtil.hpp"
51 namespace shaderexecutor
63 enum class AtomicMemoryType
65 BUFFER = 0, // Normal buffer.
66 SHARED, // Shared global struct in a compute workgroup.
67 REFERENCE, // Buffer passed as a reference.
70 // Helper struct to indicate the shader type and if it should use shared global memory.
71 class AtomicShaderType
74 AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
76 , m_atomicMemoryType (memoryType)
78 // Shared global memory can only be set to true with compute shaders.
79 DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
82 glu::ShaderType getType (void) const { return m_type; }
83 AtomicMemoryType getMemoryType (void) const { return m_atomicMemoryType; }
86 glu::ShaderType m_type;
87 AtomicMemoryType m_atomicMemoryType;
94 Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
96 VkBuffer getBuffer (void) const { return *m_buffer; }
97 void* getHostPtr (void) const { return m_allocation->getHostPtr(); }
99 void invalidate (void);
102 const DeviceInterface& m_vkd;
103 const VkDevice m_device;
104 const VkQueue m_queue;
105 const deUint32 m_queueIndex;
106 const Unique<VkBuffer> m_buffer;
107 const UniquePtr<Allocation> m_allocation;
110 typedef de::SharedPtr<Buffer> BufferSp;
112 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
114 const VkBufferCreateInfo createInfo =
116 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
118 (VkBufferCreateFlags)0,
121 VK_SHARING_MODE_EXCLUSIVE,
125 return createBuffer(vkd, device, &createInfo);
128 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
130 const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
131 MovePtr<Allocation> alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
133 VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
138 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
139 : m_vkd (context.getDeviceInterface())
140 , m_device (context.getDevice())
141 , m_queue (context.getUniversalQueue())
142 , m_queueIndex (context.getUniversalQueueFamilyIndex())
143 , m_buffer (createBuffer (context.getDeviceInterface(),
147 , m_allocation (allocateAndBindMemory (context.getDeviceInterface(),
149 context.getDefaultAllocator(),
155 void Buffer::flush (void)
157 flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
160 void Buffer::invalidate (void)
162 const auto cmdPool = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
163 const auto cmdBufferPtr = vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
164 const auto cmdBuffer = cmdBufferPtr.get();
165 const auto bufferBarrier = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
167 beginCommandBuffer(m_vkd, cmdBuffer);
168 m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
169 endCommandBuffer(m_vkd, cmdBuffer);
170 submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
172 invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
179 ATOMIC_OP_EXCHANGE = 0,
191 std::string atomicOp2Str (AtomicOperation op)
193 static const char* const s_names[] =
204 return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
224 std::string dataType2Str(DataType type)
226 static const char* const s_names[] =
235 return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
238 class BufferInterface
241 virtual void setBuffer(void* ptr) = 0;
243 virtual size_t bufferSize() = 0;
245 virtual void fillWithTestData(de::Random &rnd) = 0;
247 virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
249 virtual ~BufferInterface() {};
252 template<typename dataTypeT>
253 class TestBuffer : public BufferInterface
257 TestBuffer(AtomicOperation atomicOp)
258 : m_atomicOp(atomicOp)
264 // Use half the number of elements for inout to cause overlap between atomic operations.
265 // Each inout element at index i will have two atomic operations using input from
266 // indices i and i + NUM_ELEMENTS / 2.
267 T inout[NUM_ELEMENTS / 2];
268 T input[NUM_ELEMENTS];
269 T compare[NUM_ELEMENTS];
270 T output[NUM_ELEMENTS];
271 T invocationHitCount[NUM_ELEMENTS];
275 virtual void setBuffer(void* ptr)
277 m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
280 virtual size_t bufferSize()
282 return sizeof(BufferData<dataTypeT>);
285 virtual void fillWithTestData(de::Random &rnd)
288 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
290 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
292 m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
293 // The first half of compare elements match with every even index.
294 // The second half matches with odd indices. This causes the
295 // overlapping operations to only select one.
296 m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
297 m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
299 for (int i = 0; i < NUM_ELEMENTS; i++)
301 m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
302 m_ptr->output[i] = pattern;
303 m_ptr->invocationHitCount[i] = 0;
307 // Take a copy to be used when calculating expected values.
311 virtual void checkResults(tcu::ResultCollector& resultCollector)
313 checkOperation(m_original, *m_ptr, resultCollector);
322 Expected (T inout, T output0, T output1)
325 m_output[0] = output0;
326 m_output[1] = output1;
329 bool compare (T inout, T output0, T output1)
331 return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
332 && deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
333 && deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
337 void checkOperation (const BufferData<dataTypeT>& original,
338 const BufferData<dataTypeT>& result,
339 tcu::ResultCollector& resultCollector);
341 const AtomicOperation m_atomicOp;
343 BufferData<dataTypeT>* m_ptr;
344 BufferData<dataTypeT> m_original;
348 template<typename dataTypeT>
349 class TestBufferFloatingPoint : public BufferInterface
353 TestBufferFloatingPoint(AtomicOperation atomicOp)
354 : m_atomicOp(atomicOp)
358 struct BufferDataFloatingPoint
360 // Use half the number of elements for inout to cause overlap between atomic operations.
361 // Each inout element at index i will have two atomic operations using input from
362 // indices i and i + NUM_ELEMENTS / 2.
363 T inout[NUM_ELEMENTS / 2];
364 T input[NUM_ELEMENTS];
365 T compare[NUM_ELEMENTS];
366 T output[NUM_ELEMENTS];
367 T invocationHitCount[NUM_ELEMENTS];
371 virtual void setBuffer(void* ptr)
373 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
376 virtual size_t bufferSize()
378 return sizeof(BufferDataFloatingPoint<dataTypeT>);
381 virtual void fillWithTestData(de::Random& rnd)
384 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
386 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
388 m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getFloat());
389 // The first half of compare elements match with every even index.
390 // The second half matches with odd indices. This causes the
391 // overlapping operations to only select one.
392 m_ptr->compare[i] = m_ptr->inout[i] + (dataTypeT)(i % 2);
393 m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + (dataTypeT)(1 - (i % 2));
395 for (int i = 0; i < NUM_ELEMENTS; i++)
397 m_ptr->input[i] = static_cast<dataTypeT>(rnd.getFloat());
398 m_ptr->output[i] = pattern;
399 m_ptr->invocationHitCount[i] = 0;
403 // Take a copy to be used when calculating expected values.
407 virtual void checkResults(tcu::ResultCollector& resultCollector)
409 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
418 Expected(T inout, T output0, T output1)
421 m_output[0] = output0;
422 m_output[1] = output1;
425 bool compare(T inout, T output0, T output1)
427 T diff1 = static_cast<T>(fabs(m_inout - inout));
428 T diff2 = static_cast<T>(fabs(m_output[0] - output0));
429 T diff3 = static_cast<T>(fabs(m_output[1] - output1));
430 const T epsilon = static_cast<T>(0.00001);
431 return (diff1 < epsilon) && (diff2 < epsilon) && (diff3 < epsilon);
435 void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
436 const BufferDataFloatingPoint<dataTypeT>& result,
437 tcu::ResultCollector& resultCollector);
439 const AtomicOperation m_atomicOp;
441 BufferDataFloatingPoint<dataTypeT>* m_ptr;
442 BufferDataFloatingPoint<dataTypeT> m_original;
446 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
450 case DATA_TYPE_INT32:
451 return new TestBuffer<deInt32>(atomicOp);
452 case DATA_TYPE_UINT32:
453 return new TestBuffer<deUint32>(atomicOp);
454 case DATA_TYPE_FLOAT32:
455 return new TestBufferFloatingPoint<float>(atomicOp);
456 case DATA_TYPE_INT64:
457 return new TestBuffer<deInt64>(atomicOp);
458 case DATA_TYPE_UINT64:
459 return new TestBuffer<deUint64>(atomicOp);
460 case DATA_TYPE_FLOAT64:
461 return new TestBufferFloatingPoint<double>(atomicOp);
468 // Use template to handle both signed and unsigned cases. SPIR-V should
469 // have separate operations for both.
471 void TestBuffer<T>::checkOperation (const BufferData<T>& original,
472 const BufferData<T>& result,
473 tcu::ResultCollector& resultCollector)
475 // originalInout = original inout
476 // input0 = input at index i
477 // iinput1 = input at index i + NUM_ELEMENTS / 2
479 // atomic operation will return the memory contents before
480 // the operation and this is stored as output. Two operations
481 // are executed for each InOut value (using input0 and input1).
483 // Since there is an overlap of two operations per each
484 // InOut element, the outcome of the resulting InOut and
485 // the outputs of the operations have two result candidates
486 // depending on the execution order. Verification passes
487 // if the results match one of these options.
489 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
491 // Needed when reinterpeting the data as signed values.
492 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
493 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
494 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
496 // Expected results are collected to this vector.
497 vector<Expected<T> > exp;
503 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
504 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
510 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
511 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
517 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
518 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
524 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
525 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
531 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
532 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
538 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
539 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
543 case ATOMIC_OP_EXCHANGE:
545 exp.push_back(Expected<T>(input1, originalInout, input0));
546 exp.push_back(Expected<T>(input0, input1, originalInout));
550 case ATOMIC_OP_COMP_SWAP:
552 if (elementNdx % 2 == 0)
554 exp.push_back(Expected<T>(input0, originalInout, input0));
555 exp.push_back(Expected<T>(input0, originalInout, originalInout));
559 exp.push_back(Expected<T>(input1, input1, originalInout));
560 exp.push_back(Expected<T>(input1, originalInout, originalInout));
567 DE_FATAL("Unexpected atomic operation.");
571 const T resIo = result.inout[elementNdx];
572 const T resOutput0 = result.output[elementNdx];
573 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
576 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
578 std::ostringstream errorMessage;
579 errorMessage << "ERROR: Result value check failed at index " << elementNdx
580 << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
581 << ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
582 << tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
583 << ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
584 << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
585 << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
586 << tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
587 << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
589 resultCollector.fail(errorMessage.str());
594 // Use template to handle both float and double cases. SPIR-V should
595 // have separate operations for both.
597 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
598 const BufferDataFloatingPoint<T>& result,
599 tcu::ResultCollector& resultCollector)
601 // originalInout = original inout
602 // input0 = input at index i
603 // iinput1 = input at index i + NUM_ELEMENTS / 2
605 // atomic operation will return the memory contents before
606 // the operation and this is stored as output. Two operations
607 // are executed for each InOut value (using input0 and input1).
609 // Since there is an overlap of two operations per each
610 // InOut element, the outcome of the resulting InOut and
611 // the outputs of the operations have two result candidates
612 // depending on the execution order. Verification passes
613 // if the results match one of these options.
615 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
617 // Needed when reinterpeting the data as signed values.
618 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
619 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
620 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
622 // Expected results are collected to this vector.
623 vector<Expected<T> > exp;
629 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
630 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
634 case ATOMIC_OP_EXCHANGE:
636 exp.push_back(Expected<T>(input1, originalInout, input0));
637 exp.push_back(Expected<T>(input0, input1, originalInout));
642 DE_FATAL("Unexpected atomic operation.");
646 const T resIo = result.inout[elementNdx];
647 const T resOutput0 = result.output[elementNdx];
648 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
651 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
653 std::ostringstream errorMessage;
654 errorMessage << "ERROR: Result value check failed at index " << elementNdx
655 << ". Expected one of the two outcomes: InOut = " << exp[0].m_inout
656 << ", Output0 = " << exp[0].m_output[0] << ", Output1 = "
657 << exp[0].m_output[1] << ", or InOut = " << exp[1].m_inout
658 << ", Output0 = " << exp[1].m_output[0] << ", Output1 = "
659 << exp[1].m_output[1] << ". Got: InOut = " << resIo
660 << ", Output0 = " << resOutput0 << ", Output1 = "
661 << resOutput1 << ". Using Input0 = " << original.input[elementNdx]
662 << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
664 resultCollector.fail(errorMessage.str());
669 class AtomicOperationCaseInstance : public TestInstance
672 AtomicOperationCaseInstance (Context& context,
673 const ShaderSpec& shaderSpec,
674 AtomicShaderType shaderType,
676 AtomicOperation atomicOp);
678 virtual tcu::TestStatus iterate (void);
681 const ShaderSpec& m_shaderSpec;
682 AtomicShaderType m_shaderType;
683 const DataType m_dataType;
684 AtomicOperation m_atomicOp;
688 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context& context,
689 const ShaderSpec& shaderSpec,
690 AtomicShaderType shaderType,
692 AtomicOperation atomicOp)
693 : TestInstance (context)
694 , m_shaderSpec (shaderSpec)
695 , m_shaderType (shaderType)
696 , m_dataType (dataType)
697 , m_atomicOp (atomicOp)
701 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
703 de::UniquePtr<BufferInterface> testBuffer (createTestBuffer(m_dataType, m_atomicOp));
704 tcu::TestLog& log = m_context.getTestContext().getLog();
705 const DeviceInterface& vkd = m_context.getDeviceInterface();
706 const VkDevice device = m_context.getDevice();
707 de::Random rnd (0x62a15e34);
708 const bool useRef = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
709 const VkDescriptorType descType = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
710 const VkBufferUsageFlags usageFlags = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
712 // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
713 // a uniform buffer. If not, it will be passed directly as a descriptor.
714 Buffer buffer (m_context, usageFlags, testBuffer->bufferSize(), useRef);
715 std::unique_ptr<Buffer> auxBuffer;
719 // Pass the main buffer address inside a uniform buffer.
720 const VkBufferDeviceAddressInfo addressInfo =
722 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
723 nullptr, // const void* pNext;
724 buffer.getBuffer(), // VkBuffer buffer;
726 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
728 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
729 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
733 testBuffer->setBuffer(buffer.getHostPtr());
734 testBuffer->fillWithTestData(rnd);
738 Move<VkDescriptorSetLayout> extraResourcesLayout;
739 Move<VkDescriptorPool> extraResourcesSetPool;
740 Move<VkDescriptorSet> extraResourcesSet;
742 const VkDescriptorSetLayoutBinding bindings[] =
744 { 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
747 const VkDescriptorSetLayoutCreateInfo layoutInfo =
749 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
751 (VkDescriptorSetLayoutCreateFlags)0u,
752 DE_LENGTH_OF_ARRAY(bindings),
756 extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
758 const VkDescriptorPoolSize poolSizes[] =
763 const VkDescriptorPoolCreateInfo poolInfo =
765 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
767 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
769 DE_LENGTH_OF_ARRAY(poolSizes),
773 extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
775 const VkDescriptorSetAllocateInfo allocInfo =
777 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
779 *extraResourcesSetPool,
781 &extraResourcesLayout.get()
784 extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
786 VkDescriptorBufferInfo bufferInfo;
787 bufferInfo.buffer = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
788 bufferInfo.offset = 0u;
789 bufferInfo.range = VK_WHOLE_SIZE;
791 const VkWriteDescriptorSet descriptorWrite =
793 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
797 0u, // dstArrayElement
800 (const VkDescriptorImageInfo*)DE_NULL,
802 (const VkBufferView*)DE_NULL
805 vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
807 // Storage for output varying data.
808 std::vector<deUint32> outputs (NUM_ELEMENTS);
809 std::vector<void*> outputPtr (NUM_ELEMENTS);
811 for (size_t i = 0; i < NUM_ELEMENTS; i++)
813 outputs[i] = 0xcdcdcdcd;
814 outputPtr[i] = &outputs[i];
817 const int numWorkGroups = ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
818 UniquePtr<ShaderExecutor> executor (createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
820 executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
823 tcu::ResultCollector resultCollector(log);
825 // Check the results of the atomic operation
826 testBuffer->checkResults(resultCollector);
828 return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
831 class AtomicOperationCase : public TestCase
834 AtomicOperationCase (tcu::TestContext& testCtx,
836 const char* description,
837 AtomicShaderType type,
839 AtomicOperation atomicOp);
840 virtual ~AtomicOperationCase (void);
842 virtual TestInstance* createInstance (Context& ctx) const;
843 virtual void checkSupport (Context& ctx) const;
844 virtual void initPrograms (vk::SourceCollections& programCollection) const
846 generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
851 void createShaderSpec();
852 ShaderSpec m_shaderSpec;
853 const AtomicShaderType m_shaderType;
854 const DataType m_dataType;
855 const AtomicOperation m_atomicOp;
858 AtomicOperationCase::AtomicOperationCase (tcu::TestContext& testCtx,
860 const char* description,
861 AtomicShaderType shaderType,
863 AtomicOperation atomicOp)
864 : TestCase (testCtx, name, description)
865 , m_shaderType (shaderType)
866 , m_dataType (dataType)
867 , m_atomicOp (atomicOp)
873 AtomicOperationCase::~AtomicOperationCase (void)
877 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
879 return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
882 void AtomicOperationCase::checkSupport (Context& ctx) const
884 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
886 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
888 const auto atomicInt64Features = ctx.getShaderAtomicInt64Features();
889 const bool isSharedMemory = (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
891 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
893 TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
895 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
897 TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
901 if (m_dataType == DATA_TYPE_FLOAT32)
903 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
904 if (m_atomicOp == ATOMIC_OP_ADD)
906 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
908 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
910 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
915 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
917 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
921 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
923 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
925 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
927 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
932 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
934 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
940 if (m_dataType == DATA_TYPE_FLOAT64)
942 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
943 if (m_atomicOp == ATOMIC_OP_ADD)
945 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
947 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
949 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
954 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
956 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
960 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
962 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
964 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
966 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
971 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
973 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
979 if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
981 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
984 // Check stores and atomic operation support.
985 switch (m_shaderType.getType())
987 case glu::SHADERTYPE_VERTEX:
988 case glu::SHADERTYPE_TESSELLATION_CONTROL:
989 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
990 case glu::SHADERTYPE_GEOMETRY:
991 if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
992 TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
994 case glu::SHADERTYPE_FRAGMENT:
995 if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
996 TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
998 case glu::SHADERTYPE_COMPUTE:
1001 DE_FATAL("Unsupported shader type");
1004 checkSupportShader(ctx, m_shaderType.getType());
1007 void AtomicOperationCase::createShaderSpec (void)
1009 const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1011 // Global declarations.
1012 std::ostringstream shaderTemplateGlobalStream;
1014 // Structure in use for atomic operations.
1015 shaderTemplateGlobalStream
1016 << "${EXTENSIONS}\n"
1018 << "struct AtomicStruct\n"
1020 << " ${DATATYPE} inoutValues[${N}/2];\n"
1021 << " ${DATATYPE} inputValues[${N}];\n"
1022 << " ${DATATYPE} compareValues[${N}];\n"
1023 << " ${DATATYPE} outputValues[${N}];\n"
1024 << " int invocationHitCount[${N}];\n"
1030 // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1031 // as "buf.data", which is the name used in the atomic operation statements.
1033 // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1034 // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1035 // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1037 if (memoryType != AtomicMemoryType::REFERENCE)
1039 shaderTemplateGlobalStream
1040 << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1041 << " AtomicStruct data;\n"
1042 << "} ${RESULT_BUFFER_NAME};\n"
1046 // When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
1047 // descriptor set as the sources and results of each tested operation.
1048 if (memoryType == AtomicMemoryType::SHARED)
1050 shaderTemplateGlobalStream
1051 << "shared struct { AtomicStruct data; } buf;\n"
1058 shaderTemplateGlobalStream
1059 << "layout (buffer_reference) buffer AtomicBuffer {\n"
1060 << " AtomicStruct data;\n"
1063 << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1064 << " AtomicBuffer buf;\n"
1070 const auto shaderTemplateGlobalString = shaderTemplateGlobalStream.str();
1071 const tcu::StringTemplate shaderTemplateGlobal (shaderTemplateGlobalString);
1073 // Shader body for the non-vertex case.
1074 std::ostringstream nonVertexShaderTemplateStream;
1076 if (memoryType == AtomicMemoryType::SHARED)
1078 // Invocation zero will initialize the shared structure from the descriptor set.
1079 nonVertexShaderTemplateStream
1080 << "if (gl_LocalInvocationIndex == 0u)\n"
1082 << " buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1088 if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1090 nonVertexShaderTemplateStream
1091 << "if (!gl_HelperInvocation) {\n"
1092 << " int idx = atomicAdd(buf.data.index, 1);\n"
1093 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1099 nonVertexShaderTemplateStream
1100 << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1102 << " int idx = atomicAdd(buf.data.index, 1);\n"
1103 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1108 if (memoryType == AtomicMemoryType::SHARED)
1110 // Invocation zero will copy results back to the descriptor set.
1111 nonVertexShaderTemplateStream
1113 << "if (gl_LocalInvocationIndex == 0u)\n"
1115 << " ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1120 const auto nonVertexShaderTemplateStreamStr = nonVertexShaderTemplateStream.str();
1121 const tcu::StringTemplate nonVertexShaderTemplateSrc (nonVertexShaderTemplateStreamStr);
1123 // Shader body for the vertex case.
1124 const tcu::StringTemplate vertexShaderTemplateSrc(
1125 "int idx = gl_VertexIndex;\n"
1126 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1128 " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1132 std::ostringstream extensions;
1134 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1137 << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1138 << "#extension GL_EXT_shader_atomic_int64 : enable\n"
1141 else if ((m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1144 << "#extension GL_EXT_shader_atomic_float : enable\n"
1145 << "#extension GL_KHR_memory_scope_semantics : enable\n"
1149 if (memoryType == AtomicMemoryType::REFERENCE)
1151 extensions << "#extension GL_EXT_buffer_reference : require\n";
1155 std::map<std::string, std::string> specializations;
1157 specializations["EXTENSIONS"] = extensions.str();
1158 specializations["DATATYPE"] = dataType2Str(m_dataType);
1159 specializations["ATOMICOP"] = atomicOp2Str(m_atomicOp);
1160 specializations["SETIDX"] = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1161 specializations["N"] = de::toString((int)NUM_ELEMENTS);
1162 specializations["COMPARE_ARG"] = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1163 specializations["RESULT_BUFFER_NAME"] = ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
1166 m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1167 m_shaderSpec.glslVersion = glu::GLSL_VERSION_450;
1168 m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1169 m_shaderSpec.source = ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1170 ? vertexShaderTemplateSrc.specialize(specializations)
1171 : nonVertexShaderTemplateSrc.specialize(specializations));
1173 if (memoryType == AtomicMemoryType::SHARED)
1175 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1176 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1180 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1182 tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1186 glu::ShaderType type;
1190 { glu::SHADERTYPE_VERTEX, "vertex" },
1191 { glu::SHADERTYPE_FRAGMENT, "fragment" },
1192 { glu::SHADERTYPE_GEOMETRY, "geometry" },
1193 { glu::SHADERTYPE_TESSELLATION_CONTROL, "tess_ctrl" },
1194 { glu::SHADERTYPE_TESSELLATION_EVALUATION, "tess_eval" },
1195 { glu::SHADERTYPE_COMPUTE, "compute" },
1200 AtomicMemoryType type;
1204 { AtomicMemoryType::BUFFER, "" },
1205 { AtomicMemoryType::SHARED, "_shared" },
1206 { AtomicMemoryType::REFERENCE, "_reference" },
1213 const char* description;
1216 { DATA_TYPE_INT32, "signed", "Tests using signed data (int)" },
1217 { DATA_TYPE_UINT32, "unsigned", "Tests using unsigned data (uint)" },
1218 { DATA_TYPE_FLOAT32,"float32", "Tests using 32-bit float data" },
1219 { DATA_TYPE_INT64, "signed64bit", "Tests using 64 bit signed data (int64)" },
1220 { DATA_TYPE_UINT64, "unsigned64bit", "Tests using 64 bit unsigned data (uint64)" },
1221 { DATA_TYPE_FLOAT64,"float64", "Tests using 64-bit float data)" }
1226 AtomicOperation value;
1230 { ATOMIC_OP_EXCHANGE, "exchange" },
1231 { ATOMIC_OP_COMP_SWAP, "comp_swap" },
1232 { ATOMIC_OP_ADD, "add" },
1233 { ATOMIC_OP_MIN, "min" },
1234 { ATOMIC_OP_MAX, "max" },
1235 { ATOMIC_OP_AND, "and" },
1236 { ATOMIC_OP_OR, "or" },
1237 { ATOMIC_OP_XOR, "xor" }
1240 for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1242 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1244 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1246 // Only ADD and EXCHANGE are supported on floating-point
1247 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1249 if (atomicOp[opNdx].value != ATOMIC_OP_ADD && atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1255 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1257 // Shared memory only available in compute shaders.
1258 if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
1261 const std::string description = std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1262 const std::string name = std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1264 atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1273 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1275 return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);