1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
45 #include "tcuTextureUtil.hpp"
46 #include "tcuTexture.hpp"
47 #include "tcuVectorType.hpp"
48 #include "tcuStringTemplate.hpp"
61 using tcu::TextureFormat;
72 using tcu::Texture2DArray;
73 using tcu::TextureCube;
74 using tcu::PixelBufferAccess;
75 using tcu::ConstPixelBufferAccess;
77 using tcu::TestContext;
81 NUM_INVOCATIONS_PER_PIXEL = 5u
86 ATOMIC_OPERATION_ADD = 0,
95 ATOMIC_OPERATION_EXCHANGE,
96 ATOMIC_OPERATION_COMPARE_EXCHANGE,
101 enum class ShaderReadType
107 enum class ImageBackingType
113 static string getCoordStr (const ImageType imageType,
114 const std::string& x,
115 const std::string& y,
116 const std::string& z)
121 case IMAGE_TYPE_BUFFER:
123 case IMAGE_TYPE_1D_ARRAY:
125 return string("ivec2(" + x + "," + y + ")");
126 case IMAGE_TYPE_2D_ARRAY:
128 case IMAGE_TYPE_CUBE:
129 case IMAGE_TYPE_CUBE_ARRAY:
130 return string("ivec3(" + x + "," + y + "," + z + ")");
137 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
139 DE_ASSERT(intFormat || uintFormat || floatFormat);
141 const bool is64 = (componentWidth == 64);
144 return (is64 ? "int64_t" : "int");
146 return (is64 ? "uint64_t" : "uint");
148 return (is64 ? "double" : "float");
153 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
155 DE_ASSERT(intFormat || uintFormat || floatFormat);
157 const bool is64 = (componentWidth == 64);
160 return (is64 ? "i64vec4" : "ivec4");
162 return (is64 ? "u64vec4" : "uvec4");
164 return (is64 ? "f64vec4" : "vec4");
169 static string getAtomicFuncArgumentShaderStr (const AtomicOperation op,
173 const IVec3& gridSize)
177 case ATOMIC_OPERATION_ADD:
178 case ATOMIC_OPERATION_AND:
179 case ATOMIC_OPERATION_OR:
180 case ATOMIC_OPERATION_XOR:
181 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
182 case ATOMIC_OPERATION_MIN:
183 case ATOMIC_OPERATION_MAX:
184 // multiply by (1-2*(value % 2) to make half of the data negative
185 // this will result in generating large numbers for uint formats
186 return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
187 case ATOMIC_OPERATION_EXCHANGE:
188 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
189 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
196 static string getAtomicOperationCaseName (const AtomicOperation op)
200 case ATOMIC_OPERATION_ADD: return string("add");
201 case ATOMIC_OPERATION_SUB: return string("sub");
202 case ATOMIC_OPERATION_INC: return string("inc");
203 case ATOMIC_OPERATION_DEC: return string("dec");
204 case ATOMIC_OPERATION_MIN: return string("min");
205 case ATOMIC_OPERATION_MAX: return string("max");
206 case ATOMIC_OPERATION_AND: return string("and");
207 case ATOMIC_OPERATION_OR: return string("or");
208 case ATOMIC_OPERATION_XOR: return string("xor");
209 case ATOMIC_OPERATION_EXCHANGE: return string("exchange");
210 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("compare_exchange");
217 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
221 case ATOMIC_OPERATION_ADD: return string("imageAtomicAdd");
222 case ATOMIC_OPERATION_MIN: return string("imageAtomicMin");
223 case ATOMIC_OPERATION_MAX: return string("imageAtomicMax");
224 case ATOMIC_OPERATION_AND: return string("imageAtomicAnd");
225 case ATOMIC_OPERATION_OR: return string("imageAtomicOr");
226 case ATOMIC_OPERATION_XOR: return string("imageAtomicXor");
227 case ATOMIC_OPERATION_EXCHANGE: return string("imageAtomicExchange");
228 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("imageAtomicCompSwap");
235 template <typename T>
236 T getOperationInitialValue (const AtomicOperation op)
240 // \note 18 is just an arbitrary small nonzero value.
241 case ATOMIC_OPERATION_ADD: return 18;
242 case ATOMIC_OPERATION_INC: return 18;
243 case ATOMIC_OPERATION_SUB: return (1 << 24) - 1;
244 case ATOMIC_OPERATION_DEC: return (1 << 24) - 1;
245 case ATOMIC_OPERATION_MIN: return (1 << 15) - 1;
246 case ATOMIC_OPERATION_MAX: return 18;
247 case ATOMIC_OPERATION_AND: return (1 << 15) - 1;
248 case ATOMIC_OPERATION_OR: return 18;
249 case ATOMIC_OPERATION_XOR: return 18;
250 case ATOMIC_OPERATION_EXCHANGE: return 18;
251 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 18;
259 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
263 // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
264 case ATOMIC_OPERATION_ADD: return 0x000000BEFFFFFF18;
265 case ATOMIC_OPERATION_INC: return 0x000000BEFFFFFF18;
266 case ATOMIC_OPERATION_SUB: return (1ull << 56) - 1;
267 case ATOMIC_OPERATION_DEC: return (1ull << 56) - 1;
268 case ATOMIC_OPERATION_MIN: return (1ull << 47) - 1;
269 case ATOMIC_OPERATION_MAX: return 0x000000BEFFFFFF18;
270 case ATOMIC_OPERATION_AND: return (1ull << 47) - 1;
271 case ATOMIC_OPERATION_OR: return 0x000000BEFFFFFF18;
272 case ATOMIC_OPERATION_XOR: return 0x000000BEFFFFFF18;
273 case ATOMIC_OPERATION_EXCHANGE: return 0x000000BEFFFFFF18;
274 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 0x000000BEFFFFFF18;
277 return 0xFFFFFFFFFFFFFFFF;
282 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
284 return (deUint64)getOperationInitialValue<deInt64>(op);
288 template <typename T>
289 static T getAtomicFuncArgument (const AtomicOperation op,
290 const IVec3& invocationID,
291 const IVec3& gridSize)
293 const T x = static_cast<T>(invocationID.x());
294 const T y = static_cast<T>(invocationID.y());
295 const T z = static_cast<T>(invocationID.z());
299 // \note Fall-throughs.
300 case ATOMIC_OPERATION_ADD:
301 case ATOMIC_OPERATION_SUB:
302 case ATOMIC_OPERATION_AND:
303 case ATOMIC_OPERATION_OR:
304 case ATOMIC_OPERATION_XOR:
305 return x*x + y*y + z*z;
306 case ATOMIC_OPERATION_INC:
307 case ATOMIC_OPERATION_DEC:
309 case ATOMIC_OPERATION_MIN:
310 case ATOMIC_OPERATION_MAX:
311 // multiply half of the data by -1
312 return (1-2*(x % 2))*(x*x + y*y + z*z);
313 case ATOMIC_OPERATION_EXCHANGE:
314 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
315 return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
322 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
323 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
325 return op == ATOMIC_OPERATION_ADD ||
326 op == ATOMIC_OPERATION_SUB ||
327 op == ATOMIC_OPERATION_INC ||
328 op == ATOMIC_OPERATION_DEC ||
329 op == ATOMIC_OPERATION_MIN ||
330 op == ATOMIC_OPERATION_MAX ||
331 op == ATOMIC_OPERATION_AND ||
332 op == ATOMIC_OPERATION_OR ||
333 op == ATOMIC_OPERATION_XOR;
336 //! Checks if the operation needs an SPIR-V shader.
337 static bool isSpirvAtomicOperation (const AtomicOperation op)
339 return op == ATOMIC_OPERATION_SUB ||
340 op == ATOMIC_OPERATION_INC ||
341 op == ATOMIC_OPERATION_DEC;
344 //! Returns the SPIR-V assembler name of the given operation.
345 static std::string getSpirvAtomicOpName (const AtomicOperation op)
349 case ATOMIC_OPERATION_SUB: return "OpAtomicISub";
350 case ATOMIC_OPERATION_INC: return "OpAtomicIIncrement";
351 case ATOMIC_OPERATION_DEC: return "OpAtomicIDecrement";
359 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
360 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
364 case ATOMIC_OPERATION_SUB: return false;
365 case ATOMIC_OPERATION_INC: // fallthrough
366 case ATOMIC_OPERATION_DEC: return true;
374 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
375 template <typename T>
376 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
380 case ATOMIC_OPERATION_INC: // fallthrough.
381 case ATOMIC_OPERATION_ADD: return a + b;
382 case ATOMIC_OPERATION_DEC: // fallthrough.
383 case ATOMIC_OPERATION_SUB: return a - b;
384 case ATOMIC_OPERATION_MIN: return de::min(a, b);
385 case ATOMIC_OPERATION_MAX: return de::max(a, b);
386 case ATOMIC_OPERATION_AND: return a & b;
387 case ATOMIC_OPERATION_OR: return a | b;
388 case ATOMIC_OPERATION_XOR: return a ^ b;
389 case ATOMIC_OPERATION_EXCHANGE: return b;
390 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
397 VkImageUsageFlags getUsageFlags (bool useTransfer)
399 VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
402 usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
407 void AddFillReadShader (SourceCollections& sourceCollections,
408 const ImageType& imageType,
409 const tcu::TextureFormat& format,
410 const string& componentType,
411 const string& vec4Type)
413 const string imageInCoord = getCoordStr(imageType, "gx", "gy", "gz");
414 const string shaderImageFormatStr = getShaderImageFormatQualifier(format);
415 const string shaderImageTypeStr = getShaderImageType(format, imageType);
416 const auto componentWidth = getFormatComponentWidth(mapTextureFormat(format), 0u);
417 const string extensions = ((componentWidth == 64u)
418 ? "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
419 "#extension GL_EXT_shader_image_int64 : require\n"
423 const string fillShader = "#version 450\n"
425 "precision highp " + shaderImageTypeStr + ";\n"
427 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
428 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
430 "layout(std430, binding = 1) buffer inputBuffer\n"
432 " "+ componentType + " data[];\n"
437 " int gx = int(gl_GlobalInvocationID.x);\n"
438 " int gy = int(gl_GlobalInvocationID.y);\n"
439 " int gz = int(gl_GlobalInvocationID.z);\n"
440 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
441 " imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
444 const string readShader = "#version 450\n"
446 "precision highp " + shaderImageTypeStr + ";\n"
448 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
449 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
451 "layout(std430, binding = 1) buffer outputBuffer\n"
453 " " + componentType + " data[];\n"
458 " int gx = int(gl_GlobalInvocationID.x);\n"
459 " int gy = int(gl_GlobalInvocationID.y);\n"
460 " int gz = int(gl_GlobalInvocationID.z);\n"
461 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
462 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
466 if ((imageType != IMAGE_TYPE_1D) &&
467 (imageType != IMAGE_TYPE_1D_ARRAY) &&
468 (imageType != IMAGE_TYPE_BUFFER))
470 const string readShaderResidency = "#version 450\n"
471 "#extension GL_ARB_sparse_texture2 : require\n"
473 "precision highp " + shaderImageTypeStr + ";\n"
475 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
476 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
478 "layout(std430, binding = 1) buffer outputBuffer\n"
480 " " + componentType + " data[];\n"
485 " int gx = int(gl_GlobalInvocationID.x);\n"
486 " int gy = int(gl_GlobalInvocationID.y);\n"
487 " int gz = int(gl_GlobalInvocationID.z);\n"
488 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
489 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
490 " " + vec4Type + " sparseValue;\n"
491 " sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
492 " if (outBuffer.data[index] != sparseValue.x)\n"
493 " outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
496 sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
499 sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
500 sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
503 //! Prepare the initial data for the image
504 static void initDataForImage (const VkDevice device,
505 const DeviceInterface& deviceInterface,
506 const TextureFormat& format,
507 const AtomicOperation operation,
508 const tcu::UVec3& gridSize,
509 BufferWithMemory& buffer)
511 Allocation& bufferAllocation = buffer.getAllocation();
512 const VkFormat imageFormat = mapTextureFormat(format);
513 tcu::PixelBufferAccess pixelBuffer (format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
515 if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
517 const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
519 for (deUint32 z = 0; z < gridSize.z(); z++)
520 for (deUint32 y = 0; y < gridSize.y(); y++)
521 for (deUint32 x = 0; x < gridSize.x(); x++)
523 *((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
528 const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
530 for (deUint32 z = 0; z < gridSize.z(); z++)
531 for (deUint32 y = 0; y < gridSize.y(); y++)
532 for (deUint32 x = 0; x < gridSize.x(); x++)
534 pixelBuffer.setPixel(initialValue, x, y, z);
538 flushAlloc(deviceInterface, device, bufferAllocation);
541 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, VkImageTiling tiling, ImageType imageType, const tcu::UVec3& imageSize, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
543 const VkFormat format = mapTextureFormat(tcuFormat);
544 const VkImageType vkImgType = mapImageType(imageType);
545 const VkFormatFeatureFlags texelBufferSupport = (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
547 const auto& vki = context.getInstanceInterface();
548 const auto physicalDevice = context.getPhysicalDevice();
549 const auto usageFlags = getUsageFlags(useTransfer);
551 VkImageFormatProperties vkImageFormatProperties;
552 const auto result = vki.getPhysicalDeviceImageFormatProperties(physicalDevice, format, vkImgType, tiling, usageFlags, 0, &vkImageFormatProperties);
553 if (result != VK_SUCCESS) {
554 if (result == VK_ERROR_FORMAT_NOT_SUPPORTED)
555 TCU_THROW(NotSupportedError, "Format unsupported for tiling");
557 TCU_FAIL("vkGetPhysicalDeviceImageFormatProperties returned unexpected error");
560 if (vkImageFormatProperties.maxArrayLayers < (uint32_t)getNumLayers(imageType, imageSize)) {
561 TCU_THROW(NotSupportedError, "This format and tiling combination does not support this number of aray layers");
564 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
565 context.getPhysicalDevice(), format);
566 if ((imageType == IMAGE_TYPE_BUFFER) &&
567 ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
568 TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
570 const VkFormatFeatureFlags requiredFeaturesLinear = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
571 if (tiling == vk::VK_IMAGE_TILING_LINEAR &&
572 ((formatProperties.linearTilingFeatures & requiredFeaturesLinear) != requiredFeaturesLinear)
574 TCU_THROW(NotSupportedError, "Format doesn't support atomic storage with linear tiling");
577 if (imageType == IMAGE_TYPE_CUBE_ARRAY)
578 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
580 #ifndef CTS_USES_VULKANSC
581 if (backingType == ImageBackingType::SPARSE)
583 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
587 case VK_IMAGE_TYPE_2D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
588 case VK_IMAGE_TYPE_3D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
589 default: DE_ASSERT(false); break;
592 if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, usageFlags, tiling))
593 TCU_THROW(NotSupportedError, "Format does not support sparse images");
595 #endif // CTS_USES_VULKANSC
597 if (isFloatFormat(format))
599 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
601 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
602 const auto& atomicFloatFeatures = context.getShaderAtomicFloatFeaturesEXT();
604 if (!atomicFloatFeatures.shaderImageFloat32Atomics)
605 TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
607 if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
608 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
610 if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
612 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
613 #ifndef CTS_USES_VULKANSC
614 if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
616 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
618 #endif // CTS_USES_VULKANSC
621 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
622 TCU_FAIL("Required format feature bits not supported");
624 if (backingType == ImageBackingType::SPARSE)
626 if (!atomicFloatFeatures.sparseImageFloat32Atomics)
627 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
629 if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
630 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
634 else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
636 context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
638 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
639 const auto& atomicInt64Features = context.getShaderImageAtomicInt64FeaturesEXT();
641 if (!atomicInt64Features.shaderImageInt64Atomics)
642 TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
644 if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
645 TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
647 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
648 TCU_FAIL("Mandatory format features not supported");
653 const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
654 if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
655 TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
658 if (readType == ShaderReadType::SPARSE)
660 DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
661 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
665 class BinaryAtomicEndResultCase : public vkt::TestCase
668 BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
670 const string& description,
671 const ImageType imageType,
672 const tcu::UVec3& imageSize,
673 const tcu::TextureFormat& format,
674 const VkImageTiling tiling,
675 const AtomicOperation operation,
676 const bool useTransfer,
677 const ShaderReadType shaderReadType,
678 const ImageBackingType backingType,
679 const glu::GLSLVersion glslVersion);
681 void initPrograms (SourceCollections& sourceCollections) const;
682 TestInstance* createInstance (Context& context) const;
683 virtual void checkSupport (Context& context) const;
686 const ImageType m_imageType;
687 const tcu::UVec3 m_imageSize;
688 const tcu::TextureFormat m_format;
689 const VkImageTiling m_tiling;
690 const AtomicOperation m_operation;
691 const bool m_useTransfer;
692 const ShaderReadType m_readType;
693 const ImageBackingType m_backingType;
694 const glu::GLSLVersion m_glslVersion;
697 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
699 const string& description,
700 const ImageType imageType,
701 const tcu::UVec3& imageSize,
702 const tcu::TextureFormat& format,
703 const VkImageTiling tiling,
704 const AtomicOperation operation,
705 const bool useTransfer,
706 const ShaderReadType shaderReadType,
707 const ImageBackingType backingType,
708 const glu::GLSLVersion glslVersion)
709 : TestCase (testCtx, name, description)
710 , m_imageType (imageType)
711 , m_imageSize (imageSize)
714 , m_operation (operation)
715 , m_useTransfer (useTransfer)
716 , m_readType (shaderReadType)
717 , m_backingType (backingType)
718 , m_glslVersion (glslVersion)
722 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
724 commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
727 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
729 const VkFormat imageFormat = mapTextureFormat(m_format);
730 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
731 const bool intFormat = isIntFormat(imageFormat);
732 const bool uintFormat = isUintFormat(imageFormat);
733 const bool floatFormat = isFloatFormat(imageFormat);
734 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
735 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
737 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
739 if (isSpirvAtomicOperation(m_operation))
741 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
742 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
743 std::map<std::string, std::string> specializations;
745 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
746 if (isSpirvAtomicNoLastArgOp(m_operation))
747 specializations["LASTARG"] = "";
749 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
753 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
755 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
756 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
758 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
760 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
762 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
763 (componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
765 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
766 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
767 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
768 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
769 "#extension GL_EXT_shader_atomic_float2 : enable\n"
770 "#extension GL_KHR_memory_scope_semantics : enable";
772 string source = versionDecl + "\n" + extensions + "\n";
774 if (64 == componentWidth)
776 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
777 "#extension GL_EXT_shader_image_int64 : require\n";
780 source += "precision highp " + shaderImageTypeStr + ";\n"
782 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
783 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
787 " int gx = int(gl_GlobalInvocationID.x);\n"
788 " int gy = int(gl_GlobalInvocationID.y);\n"
789 " int gz = int(gl_GlobalInvocationID.z);\n"
790 " " + atomicInvocation + ";\n"
793 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
797 class BinaryAtomicIntermValuesCase : public vkt::TestCase
800 BinaryAtomicIntermValuesCase (tcu::TestContext& testCtx,
802 const string& description,
803 const ImageType imageType,
804 const tcu::UVec3& imageSize,
805 const tcu::TextureFormat& format,
806 const VkImageTiling tiling,
807 const AtomicOperation operation,
808 const bool useTransfer,
809 const ShaderReadType shaderReadType,
810 const ImageBackingType backingType,
811 const glu::GLSLVersion glslVersion);
813 void initPrograms (SourceCollections& sourceCollections) const;
814 TestInstance* createInstance (Context& context) const;
815 virtual void checkSupport (Context& context) const;
818 const ImageType m_imageType;
819 const tcu::UVec3 m_imageSize;
820 const tcu::TextureFormat m_format;
821 const VkImageTiling m_tiling;
822 const AtomicOperation m_operation;
823 const bool m_useTransfer;
824 const ShaderReadType m_readType;
825 const ImageBackingType m_backingType;
826 const glu::GLSLVersion m_glslVersion;
829 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext& testCtx,
831 const string& description,
832 const ImageType imageType,
833 const tcu::UVec3& imageSize,
834 const TextureFormat& format,
835 const VkImageTiling tiling,
836 const AtomicOperation operation,
837 const bool useTransfer,
838 const ShaderReadType shaderReadType,
839 const ImageBackingType backingType,
840 const glu::GLSLVersion glslVersion)
841 : TestCase (testCtx, name, description)
842 , m_imageType (imageType)
843 , m_imageSize (imageSize)
846 , m_operation (operation)
847 , m_useTransfer (useTransfer)
848 , m_readType (shaderReadType)
849 , m_backingType (backingType)
850 , m_glslVersion (glslVersion)
854 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
856 commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
859 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
861 const VkFormat imageFormat = mapTextureFormat(m_format);
862 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
863 const bool intFormat = isIntFormat(imageFormat);
864 const bool uintFormat = isUintFormat(imageFormat);
865 const bool floatFormat = isFloatFormat(imageFormat);
866 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
867 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
869 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
871 if (isSpirvAtomicOperation(m_operation))
873 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
874 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
875 std::map<std::string, std::string> specializations;
877 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
878 if (isSpirvAtomicNoLastArgOp(m_operation))
879 specializations["LASTARG"] = "";
881 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
885 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
886 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
887 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
888 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
889 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
891 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
893 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
894 (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
896 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) +
897 "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
898 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
899 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
900 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
901 "#extension GL_EXT_shader_atomic_float2 : enable\n"
902 "#extension GL_KHR_memory_scope_semantics : enable";
904 string source = versionDecl + "\n" + extensions + "\n"
907 if (64 == componentWidth)
909 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
910 "#extension GL_EXT_shader_image_int64 : require\n";
913 source += "precision highp " + shaderImageTypeStr + "; \n"
914 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
915 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
916 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
920 " int gx = int(gl_GlobalInvocationID.x);\n"
921 " int gy = int(gl_GlobalInvocationID.y);\n"
922 " int gz = int(gl_GlobalInvocationID.z);\n"
923 " imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
926 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
930 class BinaryAtomicInstanceBase : public vkt::TestInstance
934 BinaryAtomicInstanceBase (Context& context,
936 const ImageType imageType,
937 const tcu::UVec3& imageSize,
938 const TextureFormat& format,
939 const VkImageTiling tiling,
940 const AtomicOperation operation,
941 const bool useTransfer,
942 const ShaderReadType shaderReadType,
943 const ImageBackingType backingType);
945 tcu::TestStatus iterate (void);
947 virtual deUint32 getOutputBufferSize (void) const = 0;
949 virtual void prepareResources (const bool useTransfer) = 0;
950 virtual void prepareDescriptors (const bool isTexelBuffer) = 0;
952 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const = 0;
953 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
954 const VkPipeline pipeline,
955 const VkPipelineLayout pipelineLayout,
956 const VkDescriptorSet descriptorSet,
957 const VkDeviceSize& range,
958 const bool useTransfer) = 0;
960 virtual bool verifyResult (Allocation& outputBufferAllocation,
961 const bool is64Bit) const = 0;
965 void shaderFillImage (const VkCommandBuffer cmdBuffer,
966 const VkBuffer& buffer,
967 const VkPipeline pipeline,
968 const VkPipelineLayout pipelineLayout,
969 const VkDescriptorSet descriptorSet,
970 const VkDeviceSize& range,
971 const tcu::UVec3& gridSize);
973 void createImageAndView (VkFormat imageFormat,
974 const tcu::UVec3& imageExent,
976 de::MovePtr<Image>& imagePtr,
977 Move<VkImageView>& imageViewPtr);
979 void createImageResources (const VkFormat& imageFormat,
980 const bool useTransfer);
983 const ImageType m_imageType;
984 const tcu::UVec3 m_imageSize;
985 const TextureFormat m_format;
986 const VkImageTiling m_tiling;
987 const AtomicOperation m_operation;
988 const bool m_useTransfer;
989 const ShaderReadType m_readType;
990 const ImageBackingType m_backingType;
992 de::MovePtr<BufferWithMemory> m_inputBuffer;
993 de::MovePtr<BufferWithMemory> m_outputBuffer;
994 Move<VkBufferView> m_descResultBufferView;
995 Move<VkBufferView> m_descIntermResultsBufferView;
996 Move<VkDescriptorPool> m_descriptorPool;
997 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
998 Move<VkDescriptorSet> m_descriptorSet;
1000 Move<VkDescriptorSetLayout> m_descriptorSetLayoutNoTransfer;
1001 Move<VkDescriptorPool> m_descriptorPoolNoTransfer;
1003 de::MovePtr<Image> m_resultImage;
1004 Move<VkImageView> m_resultImageView;
1006 std::vector<VkSemaphore> m_waitSemaphores;
1009 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context& context,
1011 const ImageType imageType,
1012 const tcu::UVec3& imageSize,
1013 const TextureFormat& format,
1014 const VkImageTiling tiling,
1015 const AtomicOperation operation,
1016 const bool useTransfer,
1017 const ShaderReadType shaderReadType,
1018 const ImageBackingType backingType)
1019 : vkt::TestInstance (context)
1021 , m_imageType (imageType)
1022 , m_imageSize (imageSize)
1025 , m_operation (operation)
1026 , m_useTransfer (useTransfer)
1027 , m_readType (shaderReadType)
1028 , m_backingType (backingType)
1032 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
1034 const VkDevice device = m_context.getDevice();
1035 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1036 const VkQueue queue = m_context.getUniversalQueue();
1037 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1038 Allocator& allocator = m_context.getDefaultAllocator();
1039 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1040 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
1041 const VkFormat imageFormat = mapTextureFormat(m_format);
1042 const bool isTexelBuffer = (m_imageType == IMAGE_TYPE_BUFFER);
1046 createImageResources(imageFormat, m_useTransfer);
1049 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1051 //Prepare the buffer with the initial data for the image
1052 m_inputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1055 makeBufferCreateInfo(imageSizeInBytes,
1056 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1057 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1058 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1059 MemoryRequirement::HostVisible));
1061 // Fill in buffer with initial data used for image.
1062 initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1064 // Create a buffer to store shader output copied from result image
1065 m_outputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1068 makeBufferCreateInfo(outBuffSizeInBytes,
1069 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1070 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1071 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1072 MemoryRequirement::HostVisible));
1076 prepareResources(m_useTransfer);
1079 prepareDescriptors(isTexelBuffer);
1081 Move<VkDescriptorSet> descriptorSetFillImage;
1082 Move<VkShaderModule> shaderModuleFillImage;
1083 Move<VkPipelineLayout> pipelineLayoutFillImage;
1084 Move<VkPipeline> pipelineFillImage;
1086 Move<VkDescriptorSet> descriptorSetReadImage;
1087 Move<VkShaderModule> shaderModuleReadImage;
1088 Move<VkPipelineLayout> pipelineLayoutReadImage;
1089 Move<VkPipeline> pipelineReadImage;
1093 m_descriptorSetLayoutNoTransfer =
1094 DescriptorSetLayoutBuilder()
1095 .addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1096 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1097 .build(deviceInterface, device);
1099 m_descriptorPoolNoTransfer =
1100 DescriptorPoolBuilder()
1101 .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1102 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1103 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1105 descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1107 *m_descriptorPoolNoTransfer,
1108 *m_descriptorSetLayoutNoTransfer);
1110 descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1112 *m_descriptorPoolNoTransfer,
1113 *m_descriptorSetLayoutNoTransfer);
1115 shaderModuleFillImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1116 pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1117 pipelineFillImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1119 if (m_readType == ShaderReadType::SPARSE)
1121 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1125 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1127 pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1128 pipelineReadImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1132 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1133 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1134 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1136 // Create command buffer
1137 const Unique<VkCommandPool> cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1138 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1140 beginCommandBuffer(deviceInterface, *cmdBuffer);
1146 const vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1147 copyBufferToImage(deviceInterface,
1152 VK_IMAGE_ASPECT_COLOR_BIT,
1154 getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1158 shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1160 commandsBeforeCompute(*cmdBuffer);
1163 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1164 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1166 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1168 commandsAfterCompute(*cmdBuffer,
1170 *pipelineLayoutReadImage,
1171 *descriptorSetReadImage,
1175 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier
1176 = makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1177 VK_ACCESS_HOST_READ_BIT,
1178 m_outputBuffer->get(),
1180 outBuffSizeInBytes);
1182 deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1183 ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1184 VK_PIPELINE_STAGE_HOST_BIT,
1185 DE_FALSE, 0u, DE_NULL,
1186 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1188 endCommandBuffer(deviceInterface, *cmdBuffer);
1190 std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1191 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1192 static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1194 Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1196 invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1198 if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1199 return tcu::TestStatus::pass("Comparison succeeded");
1201 return tcu::TestStatus::fail("Comparison failed");
1204 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer cmdBuffer,
1205 const VkBuffer& buffer,
1206 const VkPipeline pipeline,
1207 const VkPipelineLayout pipelineLayout,
1208 const VkDescriptorSet descriptorSet,
1209 const VkDeviceSize& range,
1210 const tcu::UVec3& gridSize)
1212 const VkDevice device = m_context.getDevice();
1213 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1214 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1215 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(buffer, 0, range);
1216 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1218 DescriptorSetUpdateBuilder()
1219 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1220 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1221 .update(deviceInterface, device);
1223 const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1224 VK_ACCESS_SHADER_WRITE_BIT,
1225 VK_IMAGE_LAYOUT_UNDEFINED,
1226 VK_IMAGE_LAYOUT_GENERAL,
1227 m_resultImage->get(),
1230 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1231 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1232 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1233 (VkDependencyFlags)0,
1234 0, (const VkMemoryBarrier*)DE_NULL,
1235 0, (const VkBufferMemoryBarrier*)DE_NULL,
1236 1, &imageBarrierPre);
1238 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1239 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1241 deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1243 const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1244 VK_ACCESS_SHADER_READ_BIT,
1245 VK_IMAGE_LAYOUT_GENERAL,
1246 VK_IMAGE_LAYOUT_GENERAL,
1247 m_resultImage->get(),
1250 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1251 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1252 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1253 (VkDependencyFlags)0,
1254 0, (const VkMemoryBarrier*)DE_NULL,
1255 0, (const VkBufferMemoryBarrier*)DE_NULL,
1256 1, &imageBarrierPost);
1259 void BinaryAtomicInstanceBase::createImageAndView (VkFormat imageFormat,
1260 const tcu::UVec3& imageExent,
1262 de::MovePtr<Image>& imagePtr,
1263 Move<VkImageView>& imageViewPtr)
1265 const VkDevice device = m_context.getDevice();
1266 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1267 Allocator& allocator = m_context.getDefaultAllocator();
1268 const VkImageUsageFlags usageFlags = getUsageFlags(useTransfer);
1269 VkImageCreateFlags createFlags = 0u;
1271 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1272 createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1274 const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1276 VkImageCreateInfo createInfo =
1278 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1279 DE_NULL, // const void* pNext;
1280 createFlags, // VkImageCreateFlags flags;
1281 mapImageType(m_imageType), // VkImageType imageType;
1282 imageFormat, // VkFormat format;
1283 makeExtent3D(imageExent), // VkExtent3D extent;
1284 1u, // deUint32 mipLevels;
1285 numLayers, // deUint32 arrayLayers;
1286 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1287 m_tiling, // VkImageTiling tiling;
1288 usageFlags, // VkImageUsageFlags usage;
1289 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1290 0u, // deUint32 queueFamilyIndexCount;
1291 DE_NULL, // const deUint32* pQueueFamilyIndices;
1292 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1295 #ifndef CTS_USES_VULKANSC
1296 if (m_backingType == ImageBackingType::SPARSE)
1298 const auto& vki = m_context.getInstanceInterface();
1299 const auto physicalDevice = m_context.getPhysicalDevice();
1300 const auto sparseQueue = m_context.getSparseQueue();
1301 const auto sparseQueueIdx = m_context.getSparseQueueFamilyIndex();
1302 const auto universalQIdx = m_context.getUniversalQueueFamilyIndex();
1303 const deUint32 queueIndices[] = { universalQIdx, sparseQueueIdx };
1305 createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1307 if (sparseQueueIdx != universalQIdx)
1309 createInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
1310 createInfo.queueFamilyIndexCount = static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1311 createInfo.pQueueFamilyIndices = queueIndices;
1314 const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1315 m_waitSemaphores.push_back(sparseImage->getSemaphore());
1316 imagePtr = de::MovePtr<Image>(sparseImage);
1319 #endif // CTS_USES_VULKANSC
1320 imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1322 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1324 imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1327 void BinaryAtomicInstanceBase::createImageResources (const VkFormat& imageFormat,
1328 const bool useTransfer)
1330 //Create the image that is going to store results of atomic operations
1331 createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1334 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1338 BinaryAtomicEndResultInstance (Context& context,
1340 const ImageType imageType,
1341 const tcu::UVec3& imageSize,
1342 const TextureFormat& format,
1343 const VkImageTiling tiling,
1344 const AtomicOperation operation,
1345 const bool useTransfer,
1346 const ShaderReadType shaderReadType,
1347 const ImageBackingType backingType)
1348 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1350 virtual deUint32 getOutputBufferSize (void) const;
1352 virtual void prepareResources (const bool useTransfer) { DE_UNREF(useTransfer); }
1353 virtual void prepareDescriptors (const bool isTexelBuffer);
1355 virtual void commandsBeforeCompute (const VkCommandBuffer) const {}
1356 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1357 const VkPipeline pipeline,
1358 const VkPipelineLayout pipelineLayout,
1359 const VkDescriptorSet descriptorSet,
1360 const VkDeviceSize& range,
1361 const bool useTransfer);
1363 virtual bool verifyResult (Allocation& outputBufferAllocation,
1364 const bool is64Bit) const;
1368 template <typename T>
1369 bool isValueCorrect (const T resultValue,
1373 const UVec3& gridSize,
1374 const IVec3 extendedGridSize) const;
1377 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1379 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1382 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool isTexelBuffer)
1384 const VkDescriptorType descriptorType = isTexelBuffer ?
1385 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1386 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1387 const VkDevice device = m_context.getDevice();
1388 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1390 m_descriptorSetLayout =
1391 DescriptorSetLayoutBuilder()
1392 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1393 .build(deviceInterface, device);
1396 DescriptorPoolBuilder()
1397 .addType(descriptorType)
1398 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1400 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1404 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1406 DescriptorSetUpdateBuilder()
1407 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1408 .update(deviceInterface, device);
1412 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1414 DescriptorSetUpdateBuilder()
1415 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1416 .update(deviceInterface, device);
1420 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1421 const VkPipeline pipeline,
1422 const VkPipelineLayout pipelineLayout,
1423 const VkDescriptorSet descriptorSet,
1424 const VkDeviceSize& range,
1425 const bool useTransfer)
1427 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1428 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1429 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1431 if (m_imageType == IMAGE_TYPE_BUFFER)
1433 m_outputBuffer = m_inputBuffer;
1435 else if (useTransfer)
1437 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1438 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1439 VK_ACCESS_TRANSFER_READ_BIT,
1440 VK_IMAGE_LAYOUT_GENERAL,
1441 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1442 m_resultImage->get(),
1445 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1446 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1447 VK_PIPELINE_STAGE_TRANSFER_BIT,
1448 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1449 1u, &resultImagePostDispatchBarrier);
1451 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1453 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1457 const VkDevice device = m_context.getDevice();
1458 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1459 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1461 DescriptorSetUpdateBuilder()
1462 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1463 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1464 .update(deviceInterface, device);
1466 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1467 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1468 VK_ACCESS_SHADER_READ_BIT,
1469 VK_IMAGE_LAYOUT_GENERAL,
1470 VK_IMAGE_LAYOUT_GENERAL,
1471 m_resultImage->get(),
1474 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1475 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1476 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1477 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1478 1u, &resultImagePostDispatchBarrier);
1480 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1481 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1483 switch (m_imageType)
1485 case IMAGE_TYPE_1D_ARRAY:
1486 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1488 case IMAGE_TYPE_2D_ARRAY:
1489 case IMAGE_TYPE_CUBE:
1490 case IMAGE_TYPE_CUBE_ARRAY:
1491 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1494 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1500 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation,
1501 const bool is64Bit) const
1503 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1504 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1506 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1508 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1509 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1510 for (deInt32 x = 0; x < resultBuffer.getWidth(); x++)
1512 const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1513 deInt32 floatToIntValue = 0;
1514 bool isFloatValue = false;
1515 if (isFloatFormat(mapTextureFormat(m_format)))
1517 isFloatValue = true;
1518 floatToIntValue = static_cast<deInt32>(*((float*)resultValue));
1521 if (isOrderIndependentAtomicOperation(m_operation))
1523 if (isUintFormat(mapTextureFormat(m_format)))
1527 if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1532 if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1536 else if (isIntFormat(mapTextureFormat(m_format)))
1540 if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1545 if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1551 // 32-bit floating point
1552 if (!isValueCorrect<deInt32>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1556 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1558 // Check if the end result equals one of the atomic args.
1559 bool matchFound = false;
1561 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1563 const IVec3 gid(x + i*gridSize.x(), y, z);
1564 matchFound = is64Bit ?
1565 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1567 floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1568 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1575 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1577 // Check if the end result equals one of the atomic args.
1578 bool matchFound = false;
1580 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1582 const IVec3 gid(x + i*gridSize.x(), y, z);
1583 matchFound = is64Bit ?
1584 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1586 floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1587 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1599 template <typename T>
1600 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1602 T reference = getOperationInitialValue<T>(m_operation);
1603 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1605 const IVec3 gid(x + i*gridSize.x(), y, z);
1606 T arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1607 reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1609 return (resultValue == reference);
1612 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1614 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1617 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1621 BinaryAtomicIntermValuesInstance (Context& context,
1623 const ImageType imageType,
1624 const tcu::UVec3& imageSize,
1625 const TextureFormat& format,
1626 const VkImageTiling tiling,
1627 const AtomicOperation operation,
1628 const bool useTransfer,
1629 const ShaderReadType shaderReadType,
1630 const ImageBackingType backingType)
1631 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1633 virtual deUint32 getOutputBufferSize (void) const;
1635 virtual void prepareResources (const bool useTransfer);
1636 virtual void prepareDescriptors (const bool isTexelBuffer);
1638 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const;
1639 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1640 const VkPipeline pipeline,
1641 const VkPipelineLayout pipelineLayout,
1642 const VkDescriptorSet descriptorSet,
1643 const VkDeviceSize& range,
1644 const bool useTransfer);
1646 virtual bool verifyResult (Allocation& outputBufferAllocation,
1647 const bool is64Bit) const;
1651 template <typename T>
1652 bool areValuesCorrect (tcu::ConstPixelBufferAccess& resultBuffer,
1653 const bool isFloatingPoint,
1657 const UVec3& gridSize,
1658 const IVec3 extendedGridSize) const;
1660 template <typename T>
1661 bool verifyRecursive (const deInt32 index,
1663 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1664 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1665 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1666 de::MovePtr<Image> m_intermResultsImage;
1667 Move<VkImageView> m_intermResultsImageView;
1670 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1672 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1675 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1677 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1678 const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1679 const UVec3 extendedLayerSize = isCubeBasedImage ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1680 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1682 createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1685 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool isTexelBuffer)
1687 const VkDescriptorType descriptorType = isTexelBuffer ?
1688 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1689 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1691 const VkDevice device = m_context.getDevice();
1692 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1694 m_descriptorSetLayout =
1695 DescriptorSetLayoutBuilder()
1696 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1697 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1698 .build(deviceInterface, device);
1701 DescriptorPoolBuilder()
1702 .addType(descriptorType, 2u)
1703 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1705 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1709 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1710 m_descIntermResultsBufferView = makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1712 DescriptorSetUpdateBuilder()
1713 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1714 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1715 .update(deviceInterface, device);
1719 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1720 const VkDescriptorImageInfo descIntermResultsImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1722 DescriptorSetUpdateBuilder()
1723 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1724 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1725 .update(deviceInterface, device);
1729 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1731 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1732 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1734 const VkImageMemoryBarrier imagePreDispatchBarrier =
1735 makeImageMemoryBarrier( 0u,
1736 VK_ACCESS_SHADER_WRITE_BIT,
1737 VK_IMAGE_LAYOUT_UNDEFINED,
1738 VK_IMAGE_LAYOUT_GENERAL,
1739 m_intermResultsImage->get(),
1742 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1745 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1746 const VkPipeline pipeline,
1747 const VkPipelineLayout pipelineLayout,
1748 const VkDescriptorSet descriptorSet,
1749 const VkDeviceSize& range,
1750 const bool useTransfer)
1752 // nothing is needed for texel image buffer
1753 if (m_imageType == IMAGE_TYPE_BUFFER)
1756 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1757 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1758 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1762 const VkImageMemoryBarrier imagePostDispatchBarrier =
1763 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1764 VK_ACCESS_TRANSFER_READ_BIT,
1765 VK_IMAGE_LAYOUT_GENERAL,
1766 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1767 m_intermResultsImage->get(),
1770 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1772 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1773 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1775 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1779 const VkDevice device = m_context.getDevice();
1780 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1781 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1783 DescriptorSetUpdateBuilder()
1784 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1785 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1786 .update(deviceInterface, device);
1788 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1789 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1790 VK_ACCESS_SHADER_READ_BIT,
1791 VK_IMAGE_LAYOUT_GENERAL,
1792 VK_IMAGE_LAYOUT_GENERAL,
1793 m_intermResultsImage->get(),
1796 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1797 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1798 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1799 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1800 1u, &resultImagePostDispatchBarrier);
1802 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1803 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1805 switch (m_imageType)
1807 case IMAGE_TYPE_1D_ARRAY:
1808 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1810 case IMAGE_TYPE_2D_ARRAY:
1811 case IMAGE_TYPE_CUBE:
1812 case IMAGE_TYPE_CUBE_ARRAY:
1813 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1816 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1822 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation& outputBufferAllocation,
1823 const bool is64Bit) const
1825 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1826 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1828 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1830 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1831 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1832 for (deUint32 x = 0; x < gridSize.x(); x++)
1834 if (isUintFormat(mapTextureFormat(m_format)))
1838 if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1843 if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1847 else if (isIntFormat(mapTextureFormat(m_format)))
1851 if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1856 if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1862 // 32-bit floating point
1863 if (!areValuesCorrect<deInt32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1871 template <typename T>
1872 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1874 T resultValues[NUM_INVOCATIONS_PER_PIXEL];
1875 T atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1876 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1878 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1880 IVec3 gid(x + i*gridSize.x(), y, z);
1881 T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1882 if (isFloatingPoint)
1885 deMemcpy(&fData, &data, sizeof(fData));
1886 data = static_cast<T>(fData);
1888 resultValues[i] = data;
1889 atomicArgs[i] = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1890 argsUsed[i] = false;
1893 // Verify that the return values form a valid sequence.
1894 return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1897 template <typename T>
1898 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32 index,
1900 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1901 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1902 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1904 if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1907 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1909 if (!argsUsed[i] && resultValues[i] == valueSoFar)
1913 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1918 argsUsed[i] = false;
1925 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1927 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1932 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1934 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
1938 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1939 : m_imageType (imageType)
1940 , m_imageSize (imageSize)
1943 const ImageType m_imageType;
1944 const tcu::UVec3 m_imageSize;
1947 const ImageParams imageParamsArray[] =
1949 ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
1950 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
1951 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
1952 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
1953 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(48u, 48u, 8u)),
1954 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
1955 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u)),
1956 ImageParams(IMAGE_TYPE_BUFFER, tcu::UVec3(64u, 1u, 1u))
1959 const tcu::TextureFormat formats[] =
1961 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1962 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1963 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1964 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1965 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1968 static const VkImageTiling s_tilings[] = {
1969 VK_IMAGE_TILING_OPTIMAL,
1970 VK_IMAGE_TILING_LINEAR,
1975 ShaderReadType type;
1979 { ShaderReadType::NORMAL, "normal_read" },
1980 #ifndef CTS_USES_VULKANSC
1981 { ShaderReadType::SPARSE, "sparse_read" },
1982 #endif // CTS_USES_VULKANSC
1987 ImageBackingType type;
1991 { ImageBackingType::NORMAL, "normal_img" },
1992 #ifndef CTS_USES_VULKANSC
1993 { ImageBackingType::SPARSE, "sparse_img" },
1994 #endif // CTS_USES_VULKANSC
1997 for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1999 const AtomicOperation operation = (AtomicOperation)operationI;
2001 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
2003 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
2005 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
2006 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
2008 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
2010 for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
2012 const bool useTransfer = (useTransferIdx > 0);
2013 const string groupName = (!useTransfer ? "no" : "") + string("transfer");
2015 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str(), ""));
2017 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
2019 const auto& readType = readTypes[readTypeIdx];
2021 de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name, ""));
2023 for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
2025 const auto& backingType = backingTypes[backingTypeIdx];
2027 de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name, ""));
2029 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
2031 for (int tilingNdx = 0; tilingNdx < DE_LENGTH_OF_ARRAY(s_tilings); tilingNdx++)
2033 const TextureFormat& format = formats[formatNdx];
2034 const std::string formatName = getShaderImageFormatQualifier(format);
2035 const char* suffix = (s_tilings[tilingNdx] == VK_IMAGE_TILING_OPTIMAL) ? "" : "_linear";
2037 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
2038 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
2043 // Only 2D and 3D images may support sparse residency.
2044 // VK_IMAGE_TILING_LINEAR does not support sparse residency
2045 const auto vkImageType = mapImageType(imageType);
2046 if (backingType.type == ImageBackingType::SPARSE && ((vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D) || (s_tilings[tilingNdx] == VK_IMAGE_TILING_LINEAR)))
2049 // Only some operations are supported on floating-point
2050 if (format.type == tcu::TextureFormat::FLOAT)
2052 if (operation != ATOMIC_OPERATION_ADD &&
2053 #ifndef CTS_USES_VULKANSC
2054 operation != ATOMIC_OPERATION_MIN &&
2055 operation != ATOMIC_OPERATION_MAX &&
2056 #endif // CTS_USES_VULKANSC
2057 operation != ATOMIC_OPERATION_EXCHANGE)
2063 if (readType.type == ShaderReadType::SPARSE)
2065 // When using transfer, shader reads will not be used, so avoid creating two identical cases.
2069 // Sparse reads are not supported for all types of images.
2070 if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
2074 //!< Atomic case checks the end result of the operations, and not the intermediate return values
2075 const string caseEndResult = formatName + "_end_result" + suffix;
2076 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2078 //!< Atomic case checks the return values of the atomic function and not the end result.
2079 const string caseIntermValues = formatName + "_intermediate_values" + suffix;
2080 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2084 readTypeGroup->addChild(backingTypeGroup.release());
2087 transferGroup->addChild(readTypeGroup.release());
2090 imageTypeGroup->addChild(transferGroup.release());
2093 operationGroup->addChild(imageTypeGroup.release());
2096 imageAtomicOperationsTests->addChild(operationGroup.release());
2099 return imageAtomicOperationsTests.release();