1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2017 Google Inc.
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * \brief Atomic operations (OpAtomic*) tests.
23 *//*--------------------------------------------------------------------*/
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
40 #include "deFloat16.h"
42 #include "deStringUtil.hpp"
43 #include "deSharedPtr.hpp"
44 #include "deRandom.hpp"
45 #include "deArrayUtil.hpp"
53 namespace shaderexecutor
65 enum class AtomicMemoryType
67 BUFFER = 0, // Normal buffer.
68 SHARED, // Shared global struct in a compute workgroup.
69 REFERENCE, // Buffer passed as a reference.
72 // Helper struct to indicate the shader type and if it should use shared global memory.
73 class AtomicShaderType
76 AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
78 , m_atomicMemoryType (memoryType)
80 // Shared global memory can only be set to true with compute shaders.
81 DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
84 glu::ShaderType getType (void) const { return m_type; }
85 AtomicMemoryType getMemoryType (void) const { return m_atomicMemoryType; }
88 glu::ShaderType m_type;
89 AtomicMemoryType m_atomicMemoryType;
96 Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
98 VkBuffer getBuffer (void) const { return *m_buffer; }
99 void* getHostPtr (void) const { return m_allocation->getHostPtr(); }
101 void invalidate (void);
104 const DeviceInterface& m_vkd;
105 const VkDevice m_device;
106 const VkQueue m_queue;
107 const deUint32 m_queueIndex;
108 const Unique<VkBuffer> m_buffer;
109 const UniquePtr<Allocation> m_allocation;
112 typedef de::SharedPtr<Buffer> BufferSp;
114 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
116 const VkBufferCreateInfo createInfo =
118 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
120 (VkBufferCreateFlags)0,
123 VK_SHARING_MODE_EXCLUSIVE,
127 return createBuffer(vkd, device, &createInfo);
130 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
132 const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
133 MovePtr<Allocation> alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
135 VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
140 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
141 : m_vkd (context.getDeviceInterface())
142 , m_device (context.getDevice())
143 , m_queue (context.getUniversalQueue())
144 , m_queueIndex (context.getUniversalQueueFamilyIndex())
145 , m_buffer (createBuffer (context.getDeviceInterface(),
149 , m_allocation (allocateAndBindMemory (context.getDeviceInterface(),
151 context.getDefaultAllocator(),
157 void Buffer::flush (void)
159 flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
162 void Buffer::invalidate (void)
164 const auto cmdPool = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
165 const auto cmdBufferPtr = vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
166 const auto cmdBuffer = cmdBufferPtr.get();
167 const auto bufferBarrier = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
169 beginCommandBuffer(m_vkd, cmdBuffer);
170 m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
171 endCommandBuffer(m_vkd, cmdBuffer);
172 submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
174 invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
181 ATOMIC_OP_EXCHANGE = 0,
193 std::string atomicOp2Str (AtomicOperation op)
195 static const char* const s_names[] =
206 return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
216 DATA_TYPE_FLOAT16 = 0,
227 std::string dataType2Str(DataType type)
229 static const char* const s_names[] =
239 return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
242 class BufferInterface
245 virtual void setBuffer(void* ptr) = 0;
247 virtual size_t bufferSize() = 0;
249 virtual void fillWithTestData(de::Random &rnd) = 0;
251 virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
253 virtual ~BufferInterface() {};
256 template<typename dataTypeT>
257 class TestBuffer : public BufferInterface
261 TestBuffer(AtomicOperation atomicOp)
262 : m_atomicOp(atomicOp)
268 // Use half the number of elements for inout to cause overlap between atomic operations.
269 // Each inout element at index i will have two atomic operations using input from
270 // indices i and i + NUM_ELEMENTS / 2.
271 T inout[NUM_ELEMENTS / 2];
272 T input[NUM_ELEMENTS];
273 T compare[NUM_ELEMENTS];
274 T output[NUM_ELEMENTS];
275 T invocationHitCount[NUM_ELEMENTS];
279 virtual void setBuffer(void* ptr)
281 m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
284 virtual size_t bufferSize()
286 return sizeof(BufferData<dataTypeT>);
289 virtual void fillWithTestData(de::Random &rnd)
292 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
294 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
296 m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
297 // The first half of compare elements match with every even index.
298 // The second half matches with odd indices. This causes the
299 // overlapping operations to only select one.
300 m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
301 m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
303 for (int i = 0; i < NUM_ELEMENTS; i++)
305 m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
306 m_ptr->output[i] = pattern;
307 m_ptr->invocationHitCount[i] = 0;
311 // Take a copy to be used when calculating expected values.
315 virtual void checkResults(tcu::ResultCollector& resultCollector)
317 checkOperation(m_original, *m_ptr, resultCollector);
326 Expected (T inout, T output0, T output1)
329 m_output[0] = output0;
330 m_output[1] = output1;
333 bool compare (T inout, T output0, T output1)
335 return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
336 && deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
337 && deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
341 void checkOperation (const BufferData<dataTypeT>& original,
342 const BufferData<dataTypeT>& result,
343 tcu::ResultCollector& resultCollector);
345 const AtomicOperation m_atomicOp;
347 BufferData<dataTypeT>* m_ptr;
348 BufferData<dataTypeT> m_original;
353 bool nanSafeSloppyEquals(T x, T y)
355 if (deIsIEEENaN(x) && deIsIEEENaN(y))
358 if (deIsIEEENaN(x) || deIsIEEENaN(y))
361 return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
364 template<typename dataTypeT>
365 class TestBufferFloatingPoint : public BufferInterface
369 TestBufferFloatingPoint(AtomicOperation atomicOp)
370 : m_atomicOp(atomicOp)
374 struct BufferDataFloatingPoint
376 // Use half the number of elements for inout to cause overlap between atomic operations.
377 // Each inout element at index i will have two atomic operations using input from
378 // indices i and i + NUM_ELEMENTS / 2.
379 T inout[NUM_ELEMENTS / 2];
380 T input[NUM_ELEMENTS];
381 T compare[NUM_ELEMENTS];
382 T output[NUM_ELEMENTS];
383 deInt32 invocationHitCount[NUM_ELEMENTS];
387 virtual void setBuffer(void* ptr)
389 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
392 virtual size_t bufferSize()
394 return sizeof(BufferDataFloatingPoint<dataTypeT>);
397 virtual void fillWithTestData(de::Random& rnd)
400 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
402 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
404 m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
405 // These aren't used by any of the float tests
406 m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
408 // Add special cases for NaN and +/-0
410 m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
412 m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
414 m_ptr->inout[2] = deQuietNaN<dataTypeT>();
416 m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
418 m_ptr->inout[4] = deSignalingNaN<dataTypeT>();
419 m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
420 m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
422 m_ptr->inout[5] = deToFloatType<dataTypeT>(-0.0);
423 m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
424 m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
426 for (int i = 0; i < NUM_ELEMENTS; i++)
428 m_ptr->input[i] = deToFloatType<dataTypeT>(rnd.getFloat());
429 m_ptr->output[i] = pattern;
430 m_ptr->invocationHitCount[i] = 0;
435 // Take a copy to be used when calculating expected values.
439 virtual void checkResults(tcu::ResultCollector& resultCollector)
441 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
450 Expected(T inout, T output0, T output1)
453 m_output[0] = output0;
454 m_output[1] = output1;
457 bool compare(T inout, T output0, T output1)
459 return nanSafeSloppyEquals(m_inout, inout) &&
460 nanSafeSloppyEquals(m_output[0], output0) &&
461 nanSafeSloppyEquals(m_output[1], output1);
465 void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
466 const BufferDataFloatingPoint<dataTypeT>& result,
467 tcu::ResultCollector& resultCollector);
469 const AtomicOperation m_atomicOp;
471 BufferDataFloatingPoint<dataTypeT>* m_ptr;
472 BufferDataFloatingPoint<dataTypeT> m_original;
476 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
480 case DATA_TYPE_FLOAT16:
481 return new TestBufferFloatingPoint<deFloat16>(atomicOp);
482 case DATA_TYPE_INT32:
483 return new TestBuffer<deInt32>(atomicOp);
484 case DATA_TYPE_UINT32:
485 return new TestBuffer<deUint32>(atomicOp);
486 case DATA_TYPE_FLOAT32:
487 return new TestBufferFloatingPoint<float>(atomicOp);
488 case DATA_TYPE_INT64:
489 return new TestBuffer<deInt64>(atomicOp);
490 case DATA_TYPE_UINT64:
491 return new TestBuffer<deUint64>(atomicOp);
492 case DATA_TYPE_FLOAT64:
493 return new TestBufferFloatingPoint<double>(atomicOp);
500 // Use template to handle both signed and unsigned cases. SPIR-V should
501 // have separate operations for both.
503 void TestBuffer<T>::checkOperation (const BufferData<T>& original,
504 const BufferData<T>& result,
505 tcu::ResultCollector& resultCollector)
507 // originalInout = original inout
508 // input0 = input at index i
509 // iinput1 = input at index i + NUM_ELEMENTS / 2
511 // atomic operation will return the memory contents before
512 // the operation and this is stored as output. Two operations
513 // are executed for each InOut value (using input0 and input1).
515 // Since there is an overlap of two operations per each
516 // InOut element, the outcome of the resulting InOut and
517 // the outputs of the operations have two result candidates
518 // depending on the execution order. Verification passes
519 // if the results match one of these options.
521 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
523 // Needed when reinterpeting the data as signed values.
524 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
525 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
526 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
528 // Expected results are collected to this vector.
529 vector<Expected<T> > exp;
535 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
536 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
542 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
543 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
549 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
550 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
556 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
557 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
563 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
564 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
570 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
571 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
575 case ATOMIC_OP_EXCHANGE:
577 exp.push_back(Expected<T>(input1, originalInout, input0));
578 exp.push_back(Expected<T>(input0, input1, originalInout));
582 case ATOMIC_OP_COMP_SWAP:
584 if (elementNdx % 2 == 0)
586 exp.push_back(Expected<T>(input0, originalInout, input0));
587 exp.push_back(Expected<T>(input0, originalInout, originalInout));
591 exp.push_back(Expected<T>(input1, input1, originalInout));
592 exp.push_back(Expected<T>(input1, originalInout, originalInout));
599 DE_FATAL("Unexpected atomic operation.");
603 const T resIo = result.inout[elementNdx];
604 const T resOutput0 = result.output[elementNdx];
605 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
608 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
610 std::ostringstream errorMessage;
611 errorMessage << "ERROR: Result value check failed at index " << elementNdx
612 << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
613 << ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
614 << tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
615 << ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
616 << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
617 << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
618 << tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
619 << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
621 resultCollector.fail(errorMessage.str());
627 void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
630 if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
632 values.push_back(deQuietNaN<T>());
633 values.push_back(deSignalingNaN<T>());
635 else if (deIsSignalingNaN(x))
637 values.push_back(deQuietNaN<T>());
638 values.push_back(deSignalingNaN<T>());
642 else if (deIsSignalingNaN(y))
644 values.push_back(deQuietNaN<T>());
645 values.push_back(deSignalingNaN<T>());
649 else if (deIsIEEENaN(x) && deIsIEEENaN(y))
652 values.push_back(deQuietNaN<T>());
654 else if (deIsIEEENaN(x))
656 // One quiet NaN and one non-NaN.
659 else if (deIsIEEENaN(y))
661 // One quiet NaN and one non-NaN.
664 else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
666 values.push_back(deToFloatType<T>(0.0));
667 values.push_back(deToFloatType<T>(-0.0));
674 if (deIsIEEENaN(x) || deIsIEEENaN(y))
675 return deQuietNaN<T>();
676 return deToFloatType<T>(deToDouble(x) + deToDouble(y));
680 vector<T> floatMinValues(T x, T y)
683 handleExceptionalFloatMinMaxValues(values, x, y);
686 values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
692 vector<T> floatMaxValues(T x, T y)
695 handleExceptionalFloatMinMaxValues(values, x, y);
698 values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
703 // Use template to handle both float and double cases. SPIR-V should
704 // have separate operations for both.
706 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
707 const BufferDataFloatingPoint<T>& result,
708 tcu::ResultCollector& resultCollector)
710 // originalInout = original inout
711 // input0 = input at index i
712 // iinput1 = input at index i + NUM_ELEMENTS / 2
714 // atomic operation will return the memory contents before
715 // the operation and this is stored as output. Two operations
716 // are executed for each InOut value (using input0 and input1).
718 // Since there is an overlap of two operations per each
719 // InOut element, the outcome of the resulting InOut and
720 // the outputs of the operations have two result candidates
721 // depending on the execution order. Verification passes
722 // if the results match one of these options.
724 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
726 // Needed when reinterpeting the data as signed values.
727 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
728 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
729 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
731 // Expected results are collected to this vector.
732 vector<Expected<T> > exp;
738 exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout, floatAdd(originalInout, input0)));
739 exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), floatAdd(originalInout, input1), originalInout));
745 // The case where input0 is combined first
746 vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
747 for (T x : minOriginalAndInput0)
749 vector<T> minAll = floatMinValues(x, input1);
752 exp.push_back(Expected<T>(y, originalInout, x));
756 // The case where input1 is combined first
757 vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
758 for (T x : minOriginalAndInput1)
760 vector<T> minAll = floatMinValues(x, input0);
763 exp.push_back(Expected<T>(y, x, originalInout));
771 // The case where input0 is combined first
772 vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
773 for (T x : minOriginalAndInput0)
775 vector<T> minAll = floatMaxValues(x, input1);
778 exp.push_back(Expected<T>(y, originalInout, x));
782 // The case where input1 is combined first
783 vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
784 for (T x : minOriginalAndInput1)
786 vector<T> minAll = floatMaxValues(x, input0);
789 exp.push_back(Expected<T>(y, x, originalInout));
795 case ATOMIC_OP_EXCHANGE:
797 exp.push_back(Expected<T>(input1, originalInout, input0));
798 exp.push_back(Expected<T>(input0, input1, originalInout));
803 DE_FATAL("Unexpected atomic operation.");
807 const T resIo = result.inout[elementNdx];
808 const T resOutput0 = result.output[elementNdx];
809 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
812 bool hasMatch = false;
813 for (Expected<T> e : exp)
815 if (e.compare(resIo, resOutput0, resOutput1))
823 std::ostringstream errorMessage;
824 errorMessage << "ERROR: Result value check failed at index " << elementNdx
825 << ". Expected one of the outcomes:";
828 for (Expected<T> e : exp)
831 errorMessage << ", or";
834 errorMessage << " InOut = " << e.m_inout
835 << ", Output0 = " << e.m_output[0]
836 << ", Output1 = " << e.m_output[1];
839 errorMessage << ". Got: InOut = " << resIo
840 << ", Output0 = " << resOutput0
841 << ", Output1 = " << resOutput1
842 << ". Using Input0 = " << original.input[elementNdx]
843 << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
845 resultCollector.fail(errorMessage.str());
850 class AtomicOperationCaseInstance : public TestInstance
853 AtomicOperationCaseInstance (Context& context,
854 const ShaderSpec& shaderSpec,
855 AtomicShaderType shaderType,
857 AtomicOperation atomicOp);
859 virtual tcu::TestStatus iterate (void);
862 const ShaderSpec& m_shaderSpec;
863 AtomicShaderType m_shaderType;
864 const DataType m_dataType;
865 AtomicOperation m_atomicOp;
869 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context& context,
870 const ShaderSpec& shaderSpec,
871 AtomicShaderType shaderType,
873 AtomicOperation atomicOp)
874 : TestInstance (context)
875 , m_shaderSpec (shaderSpec)
876 , m_shaderType (shaderType)
877 , m_dataType (dataType)
878 , m_atomicOp (atomicOp)
882 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
884 de::UniquePtr<BufferInterface> testBuffer (createTestBuffer(m_dataType, m_atomicOp));
885 tcu::TestLog& log = m_context.getTestContext().getLog();
886 const DeviceInterface& vkd = m_context.getDeviceInterface();
887 const VkDevice device = m_context.getDevice();
888 de::Random rnd (0x62a15e34);
889 const bool useRef = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
890 const VkDescriptorType descType = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
891 const VkBufferUsageFlags usageFlags = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
893 // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
894 // a uniform buffer. If not, it will be passed directly as a descriptor.
895 Buffer buffer (m_context, usageFlags, testBuffer->bufferSize(), useRef);
896 std::unique_ptr<Buffer> auxBuffer;
900 // Pass the main buffer address inside a uniform buffer.
901 const VkBufferDeviceAddressInfo addressInfo =
903 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
904 nullptr, // const void* pNext;
905 buffer.getBuffer(), // VkBuffer buffer;
907 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
909 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
910 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
914 testBuffer->setBuffer(buffer.getHostPtr());
915 testBuffer->fillWithTestData(rnd);
919 Move<VkDescriptorSetLayout> extraResourcesLayout;
920 Move<VkDescriptorPool> extraResourcesSetPool;
921 Move<VkDescriptorSet> extraResourcesSet;
923 const VkDescriptorSetLayoutBinding bindings[] =
925 { 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
928 const VkDescriptorSetLayoutCreateInfo layoutInfo =
930 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
932 (VkDescriptorSetLayoutCreateFlags)0u,
933 DE_LENGTH_OF_ARRAY(bindings),
937 extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
939 const VkDescriptorPoolSize poolSizes[] =
944 const VkDescriptorPoolCreateInfo poolInfo =
946 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
948 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
950 DE_LENGTH_OF_ARRAY(poolSizes),
954 extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
956 const VkDescriptorSetAllocateInfo allocInfo =
958 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
960 *extraResourcesSetPool,
962 &extraResourcesLayout.get()
965 extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
967 VkDescriptorBufferInfo bufferInfo;
968 bufferInfo.buffer = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
969 bufferInfo.offset = 0u;
970 bufferInfo.range = VK_WHOLE_SIZE;
972 const VkWriteDescriptorSet descriptorWrite =
974 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
978 0u, // dstArrayElement
981 (const VkDescriptorImageInfo*)DE_NULL,
983 (const VkBufferView*)DE_NULL
986 vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
988 // Storage for output varying data.
989 std::vector<deUint32> outputs (NUM_ELEMENTS);
990 std::vector<void*> outputPtr (NUM_ELEMENTS);
992 for (size_t i = 0; i < NUM_ELEMENTS; i++)
994 outputs[i] = 0xcdcdcdcd;
995 outputPtr[i] = &outputs[i];
998 const int numWorkGroups = ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
999 UniquePtr<ShaderExecutor> executor (createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
1001 executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
1002 buffer.invalidate();
1004 tcu::ResultCollector resultCollector(log);
1006 // Check the results of the atomic operation
1007 testBuffer->checkResults(resultCollector);
1009 return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
1012 class AtomicOperationCase : public TestCase
1015 AtomicOperationCase (tcu::TestContext& testCtx,
1017 const char* description,
1018 AtomicShaderType type,
1020 AtomicOperation atomicOp);
1021 virtual ~AtomicOperationCase (void);
1023 virtual TestInstance* createInstance (Context& ctx) const;
1024 virtual void checkSupport (Context& ctx) const;
1025 virtual void initPrograms (vk::SourceCollections& programCollection) const
1027 generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
1032 void createShaderSpec();
1033 ShaderSpec m_shaderSpec;
1034 const AtomicShaderType m_shaderType;
1035 const DataType m_dataType;
1036 const AtomicOperation m_atomicOp;
1039 AtomicOperationCase::AtomicOperationCase (tcu::TestContext& testCtx,
1041 const char* description,
1042 AtomicShaderType shaderType,
1044 AtomicOperation atomicOp)
1045 : TestCase (testCtx, name, description)
1046 , m_shaderType (shaderType)
1047 , m_dataType (dataType)
1048 , m_atomicOp (atomicOp)
1054 AtomicOperationCase::~AtomicOperationCase (void)
1058 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
1060 return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
1063 void AtomicOperationCase::checkSupport (Context& ctx) const
1065 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1067 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
1069 const auto atomicInt64Features = ctx.getShaderAtomicInt64Features();
1070 const bool isSharedMemory = (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
1072 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
1074 TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
1076 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
1078 TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
1082 if (m_dataType == DATA_TYPE_FLOAT16)
1084 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1085 if (m_atomicOp == ATOMIC_OP_ADD)
1087 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1089 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
1091 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
1096 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
1098 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
1102 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1104 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1106 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
1108 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
1113 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
1115 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
1119 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1121 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1123 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
1125 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
1130 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
1132 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
1138 if (m_dataType == DATA_TYPE_FLOAT32)
1140 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1141 if (m_atomicOp == ATOMIC_OP_ADD)
1143 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1145 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
1147 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
1152 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
1154 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
1158 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1160 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1161 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1163 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
1165 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
1170 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
1172 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
1176 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1178 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1180 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
1182 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
1187 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
1189 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
1195 if (m_dataType == DATA_TYPE_FLOAT64)
1197 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1198 if (m_atomicOp == ATOMIC_OP_ADD)
1200 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1202 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
1204 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
1209 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
1211 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
1215 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1217 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1218 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1220 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
1222 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
1227 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
1229 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
1233 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1235 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1237 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
1239 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
1244 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
1246 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
1252 if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
1254 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
1257 // Check stores and atomic operation support.
1258 switch (m_shaderType.getType())
1260 case glu::SHADERTYPE_VERTEX:
1261 case glu::SHADERTYPE_TESSELLATION_CONTROL:
1262 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
1263 case glu::SHADERTYPE_GEOMETRY:
1264 if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
1265 TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
1267 case glu::SHADERTYPE_FRAGMENT:
1268 if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
1269 TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
1271 case glu::SHADERTYPE_COMPUTE:
1274 DE_FATAL("Unsupported shader type");
1277 checkSupportShader(ctx, m_shaderType.getType());
1280 void AtomicOperationCase::createShaderSpec (void)
1282 const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1284 // Global declarations.
1285 std::ostringstream shaderTemplateGlobalStream;
1287 // Structure in use for atomic operations.
1288 shaderTemplateGlobalStream
1289 << "${EXTENSIONS}\n"
1291 << "struct AtomicStruct\n"
1293 << " ${DATATYPE} inoutValues[${N}/2];\n"
1294 << " ${DATATYPE} inputValues[${N}];\n"
1295 << " ${DATATYPE} compareValues[${N}];\n"
1296 << " ${DATATYPE} outputValues[${N}];\n"
1297 << " int invocationHitCount[${N}];\n"
1303 // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1304 // as "buf.data", which is the name used in the atomic operation statements.
1306 // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1307 // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1308 // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1310 if (memoryType != AtomicMemoryType::REFERENCE)
1312 shaderTemplateGlobalStream
1313 << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1314 << " AtomicStruct data;\n"
1315 << "} ${RESULT_BUFFER_NAME};\n"
1319 // When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
1320 // descriptor set as the sources and results of each tested operation.
1321 if (memoryType == AtomicMemoryType::SHARED)
1323 shaderTemplateGlobalStream
1324 << "shared struct { AtomicStruct data; } buf;\n"
1331 shaderTemplateGlobalStream
1332 << "layout (buffer_reference) buffer AtomicBuffer {\n"
1333 << " AtomicStruct data;\n"
1336 << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1337 << " AtomicBuffer buf;\n"
1343 const auto shaderTemplateGlobalString = shaderTemplateGlobalStream.str();
1344 const tcu::StringTemplate shaderTemplateGlobal (shaderTemplateGlobalString);
1346 // Shader body for the non-vertex case.
1347 std::ostringstream nonVertexShaderTemplateStream;
1349 if (memoryType == AtomicMemoryType::SHARED)
1351 // Invocation zero will initialize the shared structure from the descriptor set.
1352 nonVertexShaderTemplateStream
1353 << "if (gl_LocalInvocationIndex == 0u)\n"
1355 << " buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1361 if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1363 nonVertexShaderTemplateStream
1364 << "if (!gl_HelperInvocation) {\n"
1365 << " int idx = atomicAdd(buf.data.index, 1);\n"
1366 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1372 nonVertexShaderTemplateStream
1373 << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1375 << " int idx = atomicAdd(buf.data.index, 1);\n"
1376 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1381 if (memoryType == AtomicMemoryType::SHARED)
1383 // Invocation zero will copy results back to the descriptor set.
1384 nonVertexShaderTemplateStream
1386 << "if (gl_LocalInvocationIndex == 0u)\n"
1388 << " ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1393 const auto nonVertexShaderTemplateStreamStr = nonVertexShaderTemplateStream.str();
1394 const tcu::StringTemplate nonVertexShaderTemplateSrc (nonVertexShaderTemplateStreamStr);
1396 // Shader body for the vertex case.
1397 const tcu::StringTemplate vertexShaderTemplateSrc(
1398 "int idx = gl_VertexIndex;\n"
1399 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1401 " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1405 std::ostringstream extensions;
1407 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1410 << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1411 << "#extension GL_EXT_shader_atomic_int64 : enable\n"
1414 else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1417 << "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
1418 << "#extension GL_EXT_shader_atomic_float : enable\n"
1419 << "#extension GL_EXT_shader_atomic_float2 : enable\n"
1420 << "#extension GL_KHR_memory_scope_semantics : enable\n"
1424 if (memoryType == AtomicMemoryType::REFERENCE)
1426 extensions << "#extension GL_EXT_buffer_reference : require\n";
1430 std::map<std::string, std::string> specializations;
1432 specializations["EXTENSIONS"] = extensions.str();
1433 specializations["DATATYPE"] = dataType2Str(m_dataType);
1434 specializations["ATOMICOP"] = atomicOp2Str(m_atomicOp);
1435 specializations["SETIDX"] = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1436 specializations["N"] = de::toString((int)NUM_ELEMENTS);
1437 specializations["COMPARE_ARG"] = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1438 specializations["RESULT_BUFFER_NAME"] = ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
1441 m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1442 m_shaderSpec.glslVersion = glu::GLSL_VERSION_450;
1443 m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1444 m_shaderSpec.source = ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1445 ? vertexShaderTemplateSrc.specialize(specializations)
1446 : nonVertexShaderTemplateSrc.specialize(specializations));
1448 if (memoryType == AtomicMemoryType::SHARED)
1450 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1451 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1455 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1457 tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1461 glu::ShaderType type;
1465 { glu::SHADERTYPE_VERTEX, "vertex" },
1466 { glu::SHADERTYPE_FRAGMENT, "fragment" },
1467 { glu::SHADERTYPE_GEOMETRY, "geometry" },
1468 { glu::SHADERTYPE_TESSELLATION_CONTROL, "tess_ctrl" },
1469 { glu::SHADERTYPE_TESSELLATION_EVALUATION, "tess_eval" },
1470 { glu::SHADERTYPE_COMPUTE, "compute" },
1475 AtomicMemoryType type;
1479 { AtomicMemoryType::BUFFER, "" },
1480 { AtomicMemoryType::SHARED, "_shared" },
1481 { AtomicMemoryType::REFERENCE, "_reference" },
1488 const char* description;
1491 { DATA_TYPE_FLOAT16,"float16", "Tests using 16-bit float data" },
1492 { DATA_TYPE_INT32, "signed", "Tests using signed data (int)" },
1493 { DATA_TYPE_UINT32, "unsigned", "Tests using unsigned data (uint)" },
1494 { DATA_TYPE_FLOAT32,"float32", "Tests using 32-bit float data" },
1495 { DATA_TYPE_INT64, "signed64bit", "Tests using 64 bit signed data (int64)" },
1496 { DATA_TYPE_UINT64, "unsigned64bit", "Tests using 64 bit unsigned data (uint64)" },
1497 { DATA_TYPE_FLOAT64,"float64", "Tests using 64-bit float data)" }
1502 AtomicOperation value;
1506 { ATOMIC_OP_EXCHANGE, "exchange" },
1507 { ATOMIC_OP_COMP_SWAP, "comp_swap" },
1508 { ATOMIC_OP_ADD, "add" },
1509 { ATOMIC_OP_MIN, "min" },
1510 { ATOMIC_OP_MAX, "max" },
1511 { ATOMIC_OP_AND, "and" },
1512 { ATOMIC_OP_OR, "or" },
1513 { ATOMIC_OP_XOR, "xor" }
1516 for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1518 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1520 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1522 // Only ADD and EXCHANGE are supported on floating-point
1523 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1525 if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
1526 atomicOp[opNdx].value != ATOMIC_OP_MIN &&
1527 atomicOp[opNdx].value != ATOMIC_OP_MAX &&
1528 atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1534 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1536 // Shared memory only available in compute shaders.
1537 if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
1540 const std::string description = std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1541 const std::string name = std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1543 atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1552 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1554 return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);