1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
44 #include "tcuTextureUtil.hpp"
45 #include "tcuTexture.hpp"
46 #include "tcuVectorType.hpp"
47 #include "tcuStringTemplate.hpp"
60 using tcu::TextureFormat;
71 using tcu::Texture2DArray;
72 using tcu::TextureCube;
73 using tcu::PixelBufferAccess;
74 using tcu::ConstPixelBufferAccess;
76 using tcu::TestContext;
80 NUM_INVOCATIONS_PER_PIXEL = 5u
85 ATOMIC_OPERATION_ADD = 0,
94 ATOMIC_OPERATION_EXCHANGE,
95 ATOMIC_OPERATION_COMPARE_EXCHANGE,
100 enum class ShaderReadType
106 enum class ImageBackingType
112 static string getCoordStr (const ImageType imageType,
113 const std::string& x,
114 const std::string& y,
115 const std::string& z)
120 case IMAGE_TYPE_BUFFER:
122 case IMAGE_TYPE_1D_ARRAY:
124 return string("ivec2(" + x + "," + y + ")");
125 case IMAGE_TYPE_2D_ARRAY:
127 case IMAGE_TYPE_CUBE:
128 case IMAGE_TYPE_CUBE_ARRAY:
129 return string("ivec3(" + x + "," + y + "," + z + ")");
136 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
138 DE_ASSERT(intFormat || uintFormat || floatFormat);
140 const bool is64 = (componentWidth == 64);
143 return (is64 ? "int64_t" : "int");
145 return (is64 ? "uint64_t" : "uint");
147 return (is64 ? "double" : "float");
152 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
154 DE_ASSERT(intFormat || uintFormat || floatFormat);
156 const bool is64 = (componentWidth == 64);
159 return (is64 ? "i64vec4" : "ivec4");
161 return (is64 ? "u64vec4" : "uvec4");
163 return (is64 ? "f64vec4" : "vec4");
168 static string getAtomicFuncArgumentShaderStr (const AtomicOperation op,
172 const IVec3& gridSize)
176 case ATOMIC_OPERATION_ADD:
177 case ATOMIC_OPERATION_AND:
178 case ATOMIC_OPERATION_OR:
179 case ATOMIC_OPERATION_XOR:
180 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
181 case ATOMIC_OPERATION_MIN:
182 case ATOMIC_OPERATION_MAX:
183 // multiply by (1-2*(value % 2) to make half of the data negative
184 // this will result in generating large numbers for uint formats
185 return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
186 case ATOMIC_OPERATION_EXCHANGE:
187 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
188 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
195 static string getAtomicOperationCaseName (const AtomicOperation op)
199 case ATOMIC_OPERATION_ADD: return string("add");
200 case ATOMIC_OPERATION_SUB: return string("sub");
201 case ATOMIC_OPERATION_INC: return string("inc");
202 case ATOMIC_OPERATION_DEC: return string("dec");
203 case ATOMIC_OPERATION_MIN: return string("min");
204 case ATOMIC_OPERATION_MAX: return string("max");
205 case ATOMIC_OPERATION_AND: return string("and");
206 case ATOMIC_OPERATION_OR: return string("or");
207 case ATOMIC_OPERATION_XOR: return string("xor");
208 case ATOMIC_OPERATION_EXCHANGE: return string("exchange");
209 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("compare_exchange");
216 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
220 case ATOMIC_OPERATION_ADD: return string("imageAtomicAdd");
221 case ATOMIC_OPERATION_MIN: return string("imageAtomicMin");
222 case ATOMIC_OPERATION_MAX: return string("imageAtomicMax");
223 case ATOMIC_OPERATION_AND: return string("imageAtomicAnd");
224 case ATOMIC_OPERATION_OR: return string("imageAtomicOr");
225 case ATOMIC_OPERATION_XOR: return string("imageAtomicXor");
226 case ATOMIC_OPERATION_EXCHANGE: return string("imageAtomicExchange");
227 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("imageAtomicCompSwap");
234 template <typename T>
235 T getOperationInitialValue (const AtomicOperation op)
239 // \note 18 is just an arbitrary small nonzero value.
240 case ATOMIC_OPERATION_ADD: return 18;
241 case ATOMIC_OPERATION_INC: return 18;
242 case ATOMIC_OPERATION_SUB: return (1 << 24) - 1;
243 case ATOMIC_OPERATION_DEC: return (1 << 24) - 1;
244 case ATOMIC_OPERATION_MIN: return (1 << 15) - 1;
245 case ATOMIC_OPERATION_MAX: return 18;
246 case ATOMIC_OPERATION_AND: return (1 << 15) - 1;
247 case ATOMIC_OPERATION_OR: return 18;
248 case ATOMIC_OPERATION_XOR: return 18;
249 case ATOMIC_OPERATION_EXCHANGE: return 18;
250 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 18;
258 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
262 // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
263 case ATOMIC_OPERATION_ADD: return 0x000000BEFFFFFF18;
264 case ATOMIC_OPERATION_INC: return 0x000000BEFFFFFF18;
265 case ATOMIC_OPERATION_SUB: return (1ull << 56) - 1;
266 case ATOMIC_OPERATION_DEC: return (1ull << 56) - 1;
267 case ATOMIC_OPERATION_MIN: return (1ull << 47) - 1;
268 case ATOMIC_OPERATION_MAX: return 0x000000BEFFFFFF18;
269 case ATOMIC_OPERATION_AND: return (1ull << 47) - 1;
270 case ATOMIC_OPERATION_OR: return 0x000000BEFFFFFF18;
271 case ATOMIC_OPERATION_XOR: return 0x000000BEFFFFFF18;
272 case ATOMIC_OPERATION_EXCHANGE: return 0x000000BEFFFFFF18;
273 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 0x000000BEFFFFFF18;
276 return 0xFFFFFFFFFFFFFFFF;
281 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
283 return (deUint64)getOperationInitialValue<deInt64>(op);
287 template <typename T>
288 static T getAtomicFuncArgument (const AtomicOperation op,
289 const IVec3& invocationID,
290 const IVec3& gridSize)
292 const T x = static_cast<T>(invocationID.x());
293 const T y = static_cast<T>(invocationID.y());
294 const T z = static_cast<T>(invocationID.z());
298 // \note Fall-throughs.
299 case ATOMIC_OPERATION_ADD:
300 case ATOMIC_OPERATION_SUB:
301 case ATOMIC_OPERATION_AND:
302 case ATOMIC_OPERATION_OR:
303 case ATOMIC_OPERATION_XOR:
304 return x*x + y*y + z*z;
305 case ATOMIC_OPERATION_INC:
306 case ATOMIC_OPERATION_DEC:
308 case ATOMIC_OPERATION_MIN:
309 case ATOMIC_OPERATION_MAX:
310 // multiply half of the data by -1
311 return (1-2*(x % 2))*(x*x + y*y + z*z);
312 case ATOMIC_OPERATION_EXCHANGE:
313 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
314 return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
321 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
322 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
324 return op == ATOMIC_OPERATION_ADD ||
325 op == ATOMIC_OPERATION_SUB ||
326 op == ATOMIC_OPERATION_INC ||
327 op == ATOMIC_OPERATION_DEC ||
328 op == ATOMIC_OPERATION_MIN ||
329 op == ATOMIC_OPERATION_MAX ||
330 op == ATOMIC_OPERATION_AND ||
331 op == ATOMIC_OPERATION_OR ||
332 op == ATOMIC_OPERATION_XOR;
335 //! Checks if the operation needs an SPIR-V shader.
336 static bool isSpirvAtomicOperation (const AtomicOperation op)
338 return op == ATOMIC_OPERATION_SUB ||
339 op == ATOMIC_OPERATION_INC ||
340 op == ATOMIC_OPERATION_DEC;
343 //! Returns the SPIR-V assembler name of the given operation.
344 static std::string getSpirvAtomicOpName (const AtomicOperation op)
348 case ATOMIC_OPERATION_SUB: return "OpAtomicISub";
349 case ATOMIC_OPERATION_INC: return "OpAtomicIIncrement";
350 case ATOMIC_OPERATION_DEC: return "OpAtomicIDecrement";
358 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
359 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
363 case ATOMIC_OPERATION_SUB: return false;
364 case ATOMIC_OPERATION_INC: // fallthrough
365 case ATOMIC_OPERATION_DEC: return true;
373 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
374 template <typename T>
375 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
379 case ATOMIC_OPERATION_INC: // fallthrough.
380 case ATOMIC_OPERATION_ADD: return a + b;
381 case ATOMIC_OPERATION_DEC: // fallthrough.
382 case ATOMIC_OPERATION_SUB: return a - b;
383 case ATOMIC_OPERATION_MIN: return de::min(a, b);
384 case ATOMIC_OPERATION_MAX: return de::max(a, b);
385 case ATOMIC_OPERATION_AND: return a & b;
386 case ATOMIC_OPERATION_OR: return a | b;
387 case ATOMIC_OPERATION_XOR: return a ^ b;
388 case ATOMIC_OPERATION_EXCHANGE: return b;
389 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
396 VkImageUsageFlags getUsageFlags (bool useTransfer)
398 VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
401 usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
406 void AddFillReadShader (SourceCollections& sourceCollections,
407 const ImageType& imageType,
408 const tcu::TextureFormat& format,
409 const string& componentType,
410 const string& vec4Type)
412 const string imageInCoord = getCoordStr(imageType, "gx", "gy", "gz");
413 const string shaderImageFormatStr = getShaderImageFormatQualifier(format);
414 const string shaderImageTypeStr = getShaderImageType(format, imageType);
415 const auto componentWidth = getFormatComponentWidth(mapTextureFormat(format), 0u);
416 const string extensions = ((componentWidth == 64u)
417 ? "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
418 "#extension GL_EXT_shader_image_int64 : require\n"
422 const string fillShader = "#version 450\n"
424 "precision highp " + shaderImageTypeStr + ";\n"
426 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
427 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
429 "layout(std430, binding = 1) buffer inputBuffer\n"
431 " "+ componentType + " data[];\n"
436 " int gx = int(gl_GlobalInvocationID.x);\n"
437 " int gy = int(gl_GlobalInvocationID.y);\n"
438 " int gz = int(gl_GlobalInvocationID.z);\n"
439 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
440 " imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
443 const string readShader = "#version 450\n"
445 "precision highp " + shaderImageTypeStr + ";\n"
447 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
448 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
450 "layout(std430, binding = 1) buffer outputBuffer\n"
452 " " + componentType + " data[];\n"
457 " int gx = int(gl_GlobalInvocationID.x);\n"
458 " int gy = int(gl_GlobalInvocationID.y);\n"
459 " int gz = int(gl_GlobalInvocationID.z);\n"
460 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
461 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
465 if ((imageType != IMAGE_TYPE_1D) &&
466 (imageType != IMAGE_TYPE_1D_ARRAY) &&
467 (imageType != IMAGE_TYPE_BUFFER))
469 const string readShaderResidency = "#version 450\n"
470 "#extension GL_ARB_sparse_texture2 : require\n"
472 "precision highp " + shaderImageTypeStr + ";\n"
474 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
475 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
477 "layout(std430, binding = 1) buffer outputBuffer\n"
479 " " + componentType + " data[];\n"
484 " int gx = int(gl_GlobalInvocationID.x);\n"
485 " int gy = int(gl_GlobalInvocationID.y);\n"
486 " int gz = int(gl_GlobalInvocationID.z);\n"
487 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
488 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
489 " " + vec4Type + " sparseValue;\n"
490 " sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
491 " if (outBuffer.data[index] != sparseValue.x)\n"
492 " outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
495 sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
498 sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
499 sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
502 //! Prepare the initial data for the image
503 static void initDataForImage (const VkDevice device,
504 const DeviceInterface& deviceInterface,
505 const TextureFormat& format,
506 const AtomicOperation operation,
507 const tcu::UVec3& gridSize,
510 Allocation& bufferAllocation = buffer.getAllocation();
511 const VkFormat imageFormat = mapTextureFormat(format);
512 tcu::PixelBufferAccess pixelBuffer (format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
514 if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
516 const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
518 for (deUint32 z = 0; z < gridSize.z(); z++)
519 for (deUint32 y = 0; y < gridSize.y(); y++)
520 for (deUint32 x = 0; x < gridSize.x(); x++)
522 *((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
527 const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
529 for (deUint32 z = 0; z < gridSize.z(); z++)
530 for (deUint32 y = 0; y < gridSize.y(); y++)
531 for (deUint32 x = 0; x < gridSize.x(); x++)
533 pixelBuffer.setPixel(initialValue, x, y, z);
537 flushAlloc(deviceInterface, device, bufferAllocation);
540 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, ImageType imageType, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
542 const VkFormat format = mapTextureFormat(tcuFormat);
543 const VkImageType vkImgType = mapImageType(imageType);
544 const VkFormatFeatureFlags texelBufferSupport = (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
545 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
546 context.getPhysicalDevice(), format);
548 if ((imageType == IMAGE_TYPE_BUFFER) &&
549 ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
550 TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
552 if (imageType == IMAGE_TYPE_CUBE_ARRAY)
553 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
555 if (backingType == ImageBackingType::SPARSE)
557 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
561 case VK_IMAGE_TYPE_2D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
562 case VK_IMAGE_TYPE_3D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
563 default: DE_ASSERT(false); break;
566 if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, getUsageFlags(useTransfer), VK_IMAGE_TILING_OPTIMAL))
567 TCU_THROW(NotSupportedError, "Format does not support sparse images");
570 if (isFloatFormat(format))
572 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
574 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
575 const auto& atomicFloatFeatures = context.getShaderAtomicFloatFeaturesEXT();
577 if (!atomicFloatFeatures.shaderImageFloat32Atomics)
578 TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
580 if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
581 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
583 if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
585 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
586 if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
588 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
592 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
593 TCU_FAIL("Required format feature bits not supported");
595 if (backingType == ImageBackingType::SPARSE)
597 if (!atomicFloatFeatures.sparseImageFloat32Atomics)
598 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
600 if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
601 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
605 else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
607 context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
609 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
610 const auto& atomicInt64Features = context.getShaderImageAtomicInt64FeaturesEXT();
612 if (!atomicInt64Features.shaderImageInt64Atomics)
613 TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
615 if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
616 TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
618 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
619 TCU_FAIL("Mandatory format features not supported");
624 const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
625 if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
626 TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
629 if (readType == ShaderReadType::SPARSE)
631 DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
632 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
636 class BinaryAtomicEndResultCase : public vkt::TestCase
639 BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
641 const string& description,
642 const ImageType imageType,
643 const tcu::UVec3& imageSize,
644 const tcu::TextureFormat& format,
645 const AtomicOperation operation,
646 const bool useTransfer,
647 const ShaderReadType shaderReadType,
648 const ImageBackingType backingType,
649 const glu::GLSLVersion glslVersion);
651 void initPrograms (SourceCollections& sourceCollections) const;
652 TestInstance* createInstance (Context& context) const;
653 virtual void checkSupport (Context& context) const;
656 const ImageType m_imageType;
657 const tcu::UVec3 m_imageSize;
658 const tcu::TextureFormat m_format;
659 const AtomicOperation m_operation;
660 const bool m_useTransfer;
661 const ShaderReadType m_readType;
662 const ImageBackingType m_backingType;
663 const glu::GLSLVersion m_glslVersion;
666 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
668 const string& description,
669 const ImageType imageType,
670 const tcu::UVec3& imageSize,
671 const tcu::TextureFormat& format,
672 const AtomicOperation operation,
673 const bool useTransfer,
674 const ShaderReadType shaderReadType,
675 const ImageBackingType backingType,
676 const glu::GLSLVersion glslVersion)
677 : TestCase (testCtx, name, description)
678 , m_imageType (imageType)
679 , m_imageSize (imageSize)
681 , m_operation (operation)
682 , m_useTransfer (useTransfer)
683 , m_readType (shaderReadType)
684 , m_backingType (backingType)
685 , m_glslVersion (glslVersion)
689 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
691 commonCheckSupport(context, m_format, m_imageType, m_operation, m_useTransfer, m_readType, m_backingType);
694 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
696 const VkFormat imageFormat = mapTextureFormat(m_format);
697 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
698 const bool intFormat = isIntFormat(imageFormat);
699 const bool uintFormat = isUintFormat(imageFormat);
700 const bool floatFormat = isFloatFormat(imageFormat);
701 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
702 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
704 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
706 if (isSpirvAtomicOperation(m_operation))
708 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
709 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
710 std::map<std::string, std::string> specializations;
712 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
713 if (isSpirvAtomicNoLastArgOp(m_operation))
714 specializations["LASTARG"] = "";
716 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
720 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
722 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
723 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
725 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
727 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
729 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
730 (componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
732 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
733 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
734 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
735 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
736 "#extension GL_EXT_shader_atomic_float2 : enable\n"
737 "#extension GL_KHR_memory_scope_semantics : enable";
739 string source = versionDecl + "\n" + extensions + "\n";
741 if (64 == componentWidth)
743 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
744 "#extension GL_EXT_shader_image_int64 : require\n";
747 source += "precision highp " + shaderImageTypeStr + ";\n"
749 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
750 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
754 " int gx = int(gl_GlobalInvocationID.x);\n"
755 " int gy = int(gl_GlobalInvocationID.y);\n"
756 " int gz = int(gl_GlobalInvocationID.z);\n"
757 " " + atomicInvocation + ";\n"
760 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
764 class BinaryAtomicIntermValuesCase : public vkt::TestCase
767 BinaryAtomicIntermValuesCase (tcu::TestContext& testCtx,
769 const string& description,
770 const ImageType imageType,
771 const tcu::UVec3& imageSize,
772 const tcu::TextureFormat& format,
773 const AtomicOperation operation,
774 const bool useTransfer,
775 const ShaderReadType shaderReadType,
776 const ImageBackingType backingType,
777 const glu::GLSLVersion glslVersion);
779 void initPrograms (SourceCollections& sourceCollections) const;
780 TestInstance* createInstance (Context& context) const;
781 virtual void checkSupport (Context& context) const;
784 const ImageType m_imageType;
785 const tcu::UVec3 m_imageSize;
786 const tcu::TextureFormat m_format;
787 const AtomicOperation m_operation;
788 const bool m_useTransfer;
789 const ShaderReadType m_readType;
790 const ImageBackingType m_backingType;
791 const glu::GLSLVersion m_glslVersion;
794 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext& testCtx,
796 const string& description,
797 const ImageType imageType,
798 const tcu::UVec3& imageSize,
799 const TextureFormat& format,
800 const AtomicOperation operation,
801 const bool useTransfer,
802 const ShaderReadType shaderReadType,
803 const ImageBackingType backingType,
804 const glu::GLSLVersion glslVersion)
805 : TestCase (testCtx, name, description)
806 , m_imageType (imageType)
807 , m_imageSize (imageSize)
809 , m_operation (operation)
810 , m_useTransfer (useTransfer)
811 , m_readType (shaderReadType)
812 , m_backingType (backingType)
813 , m_glslVersion (glslVersion)
817 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
819 commonCheckSupport(context, m_format, m_imageType, m_operation, m_useTransfer, m_readType, m_backingType);
822 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
824 const VkFormat imageFormat = mapTextureFormat(m_format);
825 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
826 const bool intFormat = isIntFormat(imageFormat);
827 const bool uintFormat = isUintFormat(imageFormat);
828 const bool floatFormat = isFloatFormat(imageFormat);
829 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
830 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
832 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
834 if (isSpirvAtomicOperation(m_operation))
836 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
837 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
838 std::map<std::string, std::string> specializations;
840 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
841 if (isSpirvAtomicNoLastArgOp(m_operation))
842 specializations["LASTARG"] = "";
844 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
848 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
849 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
850 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
851 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
852 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
854 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
856 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
857 (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
859 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) +
860 "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
861 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
862 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
863 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
864 "#extension GL_EXT_shader_atomic_float2 : enable\n"
865 "#extension GL_KHR_memory_scope_semantics : enable";
867 string source = versionDecl + "\n" + extensions + "\n"
870 if (64 == componentWidth)
872 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
873 "#extension GL_EXT_shader_image_int64 : require\n";
876 source += "precision highp " + shaderImageTypeStr + "; \n"
877 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
878 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
879 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
883 " int gx = int(gl_GlobalInvocationID.x);\n"
884 " int gy = int(gl_GlobalInvocationID.y);\n"
885 " int gz = int(gl_GlobalInvocationID.z);\n"
886 " imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
889 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
893 class BinaryAtomicInstanceBase : public vkt::TestInstance
897 BinaryAtomicInstanceBase (Context& context,
899 const ImageType imageType,
900 const tcu::UVec3& imageSize,
901 const TextureFormat& format,
902 const AtomicOperation operation,
903 const bool useTransfer,
904 const ShaderReadType shaderReadType,
905 const ImageBackingType backingType);
907 tcu::TestStatus iterate (void);
909 virtual deUint32 getOutputBufferSize (void) const = 0;
911 virtual void prepareResources (const bool useTransfer) = 0;
912 virtual void prepareDescriptors (const bool isTexelBuffer) = 0;
914 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const = 0;
915 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
916 const VkPipeline pipeline,
917 const VkPipelineLayout pipelineLayout,
918 const VkDescriptorSet descriptorSet,
919 const VkDeviceSize& range,
920 const bool useTransfer) = 0;
922 virtual bool verifyResult (Allocation& outputBufferAllocation,
923 const bool is64Bit) const = 0;
927 void shaderFillImage (const VkCommandBuffer cmdBuffer,
928 const VkBuffer& buffer,
929 const VkPipeline pipeline,
930 const VkPipelineLayout pipelineLayout,
931 const VkDescriptorSet descriptorSet,
932 const VkDeviceSize& range,
933 const tcu::UVec3& gridSize);
935 void createImageAndView (VkFormat imageFormat,
936 const tcu::UVec3& imageExent,
938 de::MovePtr<Image>& imagePtr,
939 Move<VkImageView>& imageViewPtr);
941 void createImageResources (const VkFormat& imageFormat,
942 const bool useTransfer);
945 const ImageType m_imageType;
946 const tcu::UVec3 m_imageSize;
947 const TextureFormat m_format;
948 const AtomicOperation m_operation;
949 const bool m_useTransfer;
950 const ShaderReadType m_readType;
951 const ImageBackingType m_backingType;
953 de::MovePtr<Buffer> m_inputBuffer;
954 de::MovePtr<Buffer> m_outputBuffer;
955 Move<VkBufferView> m_descResultBufferView;
956 Move<VkBufferView> m_descIntermResultsBufferView;
957 Move<VkDescriptorPool> m_descriptorPool;
958 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
959 Move<VkDescriptorSet> m_descriptorSet;
961 Move<VkDescriptorSetLayout> m_descriptorSetLayoutNoTransfer;
962 Move<VkDescriptorPool> m_descriptorPoolNoTransfer;
964 de::MovePtr<Image> m_resultImage;
965 Move<VkImageView> m_resultImageView;
967 std::vector<VkSemaphore> m_waitSemaphores;
970 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context& context,
972 const ImageType imageType,
973 const tcu::UVec3& imageSize,
974 const TextureFormat& format,
975 const AtomicOperation operation,
976 const bool useTransfer,
977 const ShaderReadType shaderReadType,
978 const ImageBackingType backingType)
979 : vkt::TestInstance (context)
981 , m_imageType (imageType)
982 , m_imageSize (imageSize)
984 , m_operation (operation)
985 , m_useTransfer (useTransfer)
986 , m_readType (shaderReadType)
987 , m_backingType (backingType)
991 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
993 const VkDevice device = m_context.getDevice();
994 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
995 const VkQueue queue = m_context.getUniversalQueue();
996 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
997 Allocator& allocator = m_context.getDefaultAllocator();
998 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
999 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
1000 const VkFormat imageFormat = mapTextureFormat(m_format);
1001 const bool isTexelBuffer = (m_imageType == IMAGE_TYPE_BUFFER);
1005 createImageResources(imageFormat, m_useTransfer);
1008 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1010 //Prepare the buffer with the initial data for the image
1011 m_inputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface,
1014 makeBufferCreateInfo(imageSizeInBytes,
1015 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1016 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1017 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1018 MemoryRequirement::HostVisible));
1020 // Fill in buffer with initial data used for image.
1021 initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1023 // Create a buffer to store shader output copied from result image
1024 m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface,
1027 makeBufferCreateInfo(outBuffSizeInBytes,
1028 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1029 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1030 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1031 MemoryRequirement::HostVisible));
1035 prepareResources(m_useTransfer);
1038 prepareDescriptors(isTexelBuffer);
1040 Move<VkDescriptorSet> descriptorSetFillImage;
1041 Move<VkShaderModule> shaderModuleFillImage;
1042 Move<VkPipelineLayout> pipelineLayoutFillImage;
1043 Move<VkPipeline> pipelineFillImage;
1045 Move<VkDescriptorSet> descriptorSetReadImage;
1046 Move<VkShaderModule> shaderModuleReadImage;
1047 Move<VkPipelineLayout> pipelineLayoutReadImage;
1048 Move<VkPipeline> pipelineReadImage;
1052 m_descriptorSetLayoutNoTransfer =
1053 DescriptorSetLayoutBuilder()
1054 .addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1055 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1056 .build(deviceInterface, device);
1058 m_descriptorPoolNoTransfer =
1059 DescriptorPoolBuilder()
1060 .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1061 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1062 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1064 descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1066 *m_descriptorPoolNoTransfer,
1067 *m_descriptorSetLayoutNoTransfer);
1069 descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1071 *m_descriptorPoolNoTransfer,
1072 *m_descriptorSetLayoutNoTransfer);
1074 shaderModuleFillImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1075 pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1076 pipelineFillImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1078 if (m_readType == ShaderReadType::SPARSE)
1080 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1084 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1086 pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1087 pipelineReadImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1091 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1092 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1093 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1095 // Create command buffer
1096 const Unique<VkCommandPool> cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1097 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1099 beginCommandBuffer(deviceInterface, *cmdBuffer);
1105 const vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1106 copyBufferToImage(deviceInterface,
1111 VK_IMAGE_ASPECT_COLOR_BIT,
1113 getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1117 shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1119 commandsBeforeCompute(*cmdBuffer);
1122 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1123 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1125 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1127 commandsAfterCompute(*cmdBuffer,
1129 *pipelineLayoutReadImage,
1130 *descriptorSetReadImage,
1134 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier
1135 = makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1136 VK_ACCESS_HOST_READ_BIT,
1137 m_outputBuffer->get(),
1139 outBuffSizeInBytes);
1141 deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1142 ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1143 VK_PIPELINE_STAGE_HOST_BIT,
1144 DE_FALSE, 0u, DE_NULL,
1145 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1147 endCommandBuffer(deviceInterface, *cmdBuffer);
1149 std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1150 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1151 static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1153 Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1155 invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1157 if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1158 return tcu::TestStatus::pass("Comparison succeeded");
1160 return tcu::TestStatus::fail("Comparison failed");
1163 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer cmdBuffer,
1164 const VkBuffer& buffer,
1165 const VkPipeline pipeline,
1166 const VkPipelineLayout pipelineLayout,
1167 const VkDescriptorSet descriptorSet,
1168 const VkDeviceSize& range,
1169 const tcu::UVec3& gridSize)
1171 const VkDevice device = m_context.getDevice();
1172 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1173 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1174 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(buffer, 0, range);
1175 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1177 DescriptorSetUpdateBuilder()
1178 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1179 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1180 .update(deviceInterface, device);
1182 const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1183 VK_ACCESS_SHADER_WRITE_BIT,
1184 VK_IMAGE_LAYOUT_UNDEFINED,
1185 VK_IMAGE_LAYOUT_GENERAL,
1186 m_resultImage->get(),
1189 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1190 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1191 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1192 (VkDependencyFlags)0,
1193 0, (const VkMemoryBarrier*)DE_NULL,
1194 0, (const VkBufferMemoryBarrier*)DE_NULL,
1195 1, &imageBarrierPre);
1197 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1198 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1200 deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1202 const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1203 VK_ACCESS_SHADER_READ_BIT,
1204 VK_IMAGE_LAYOUT_GENERAL,
1205 VK_IMAGE_LAYOUT_GENERAL,
1206 m_resultImage->get(),
1209 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1210 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1211 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1212 (VkDependencyFlags)0,
1213 0, (const VkMemoryBarrier*)DE_NULL,
1214 0, (const VkBufferMemoryBarrier*)DE_NULL,
1215 1, &imageBarrierPost);
1218 void BinaryAtomicInstanceBase::createImageAndView (VkFormat imageFormat,
1219 const tcu::UVec3& imageExent,
1221 de::MovePtr<Image>& imagePtr,
1222 Move<VkImageView>& imageViewPtr)
1224 const VkDevice device = m_context.getDevice();
1225 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1226 Allocator& allocator = m_context.getDefaultAllocator();
1227 const VkImageUsageFlags usageFlags = getUsageFlags(useTransfer);
1228 VkImageCreateFlags createFlags = 0u;
1230 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1231 createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1233 const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1235 VkImageCreateInfo createInfo =
1237 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1238 DE_NULL, // const void* pNext;
1239 createFlags, // VkImageCreateFlags flags;
1240 mapImageType(m_imageType), // VkImageType imageType;
1241 imageFormat, // VkFormat format;
1242 makeExtent3D(imageExent), // VkExtent3D extent;
1243 1u, // deUint32 mipLevels;
1244 numLayers, // deUint32 arrayLayers;
1245 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1246 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
1247 usageFlags, // VkImageUsageFlags usage;
1248 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1249 0u, // deUint32 queueFamilyIndexCount;
1250 DE_NULL, // const deUint32* pQueueFamilyIndices;
1251 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1254 if (m_backingType == ImageBackingType::SPARSE)
1256 const auto& vki = m_context.getInstanceInterface();
1257 const auto physicalDevice = m_context.getPhysicalDevice();
1258 const auto sparseQueue = m_context.getSparseQueue();
1259 const auto sparseQueueIdx = m_context.getSparseQueueFamilyIndex();
1260 const auto universalQIdx = m_context.getUniversalQueueFamilyIndex();
1261 const deUint32 queueIndices[] = { universalQIdx, sparseQueueIdx };
1263 createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1265 if (sparseQueueIdx != universalQIdx)
1267 createInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
1268 createInfo.queueFamilyIndexCount = static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1269 createInfo.pQueueFamilyIndices = queueIndices;
1272 const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1273 m_waitSemaphores.push_back(sparseImage->getSemaphore());
1274 imagePtr = de::MovePtr<Image>(sparseImage);
1277 imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1279 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1281 imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1284 void BinaryAtomicInstanceBase::createImageResources (const VkFormat& imageFormat,
1285 const bool useTransfer)
1287 //Create the image that is going to store results of atomic operations
1288 createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1291 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1295 BinaryAtomicEndResultInstance (Context& context,
1297 const ImageType imageType,
1298 const tcu::UVec3& imageSize,
1299 const TextureFormat& format,
1300 const AtomicOperation operation,
1301 const bool useTransfer,
1302 const ShaderReadType shaderReadType,
1303 const ImageBackingType backingType)
1304 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation, useTransfer, shaderReadType, backingType) {}
1306 virtual deUint32 getOutputBufferSize (void) const;
1308 virtual void prepareResources (const bool useTransfer) { DE_UNREF(useTransfer); }
1309 virtual void prepareDescriptors (const bool isTexelBuffer);
1311 virtual void commandsBeforeCompute (const VkCommandBuffer) const {}
1312 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1313 const VkPipeline pipeline,
1314 const VkPipelineLayout pipelineLayout,
1315 const VkDescriptorSet descriptorSet,
1316 const VkDeviceSize& range,
1317 const bool useTransfer);
1319 virtual bool verifyResult (Allocation& outputBufferAllocation,
1320 const bool is64Bit) const;
1324 template <typename T>
1325 bool isValueCorrect (const T resultValue,
1329 const UVec3& gridSize,
1330 const IVec3 extendedGridSize) const;
1333 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1335 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1338 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool isTexelBuffer)
1340 const VkDescriptorType descriptorType = isTexelBuffer ?
1341 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1342 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1343 const VkDevice device = m_context.getDevice();
1344 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1346 m_descriptorSetLayout =
1347 DescriptorSetLayoutBuilder()
1348 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1349 .build(deviceInterface, device);
1352 DescriptorPoolBuilder()
1353 .addType(descriptorType)
1354 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1356 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1360 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1362 DescriptorSetUpdateBuilder()
1363 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1364 .update(deviceInterface, device);
1368 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1370 DescriptorSetUpdateBuilder()
1371 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1372 .update(deviceInterface, device);
1376 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1377 const VkPipeline pipeline,
1378 const VkPipelineLayout pipelineLayout,
1379 const VkDescriptorSet descriptorSet,
1380 const VkDeviceSize& range,
1381 const bool useTransfer)
1383 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1384 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1385 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1387 if (m_imageType == IMAGE_TYPE_BUFFER)
1389 m_outputBuffer = m_inputBuffer;
1391 else if (useTransfer)
1393 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1394 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1395 VK_ACCESS_TRANSFER_READ_BIT,
1396 VK_IMAGE_LAYOUT_GENERAL,
1397 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1398 m_resultImage->get(),
1401 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1402 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1403 VK_PIPELINE_STAGE_TRANSFER_BIT,
1404 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1405 1u, &resultImagePostDispatchBarrier);
1407 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1409 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1413 const VkDevice device = m_context.getDevice();
1414 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1415 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1417 DescriptorSetUpdateBuilder()
1418 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1419 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1420 .update(deviceInterface, device);
1422 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1423 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1424 VK_ACCESS_SHADER_READ_BIT,
1425 VK_IMAGE_LAYOUT_GENERAL,
1426 VK_IMAGE_LAYOUT_GENERAL,
1427 m_resultImage->get(),
1430 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1431 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1432 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1433 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1434 1u, &resultImagePostDispatchBarrier);
1436 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1437 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1439 switch (m_imageType)
1441 case IMAGE_TYPE_1D_ARRAY:
1442 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1444 case IMAGE_TYPE_2D_ARRAY:
1445 case IMAGE_TYPE_CUBE:
1446 case IMAGE_TYPE_CUBE_ARRAY:
1447 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1450 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1456 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation,
1457 const bool is64Bit) const
1459 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1460 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1462 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1464 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1465 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1466 for (deInt32 x = 0; x < resultBuffer.getWidth(); x++)
1468 const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1469 deInt32 floatToIntValue = 0;
1470 bool isFloatValue = false;
1471 if (isFloatFormat(mapTextureFormat(m_format)))
1473 isFloatValue = true;
1474 floatToIntValue = static_cast<deInt32>(*((float*)resultValue));
1477 if (isOrderIndependentAtomicOperation(m_operation))
1479 if (isUintFormat(mapTextureFormat(m_format)))
1483 if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1488 if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1492 else if (isIntFormat(mapTextureFormat(m_format)))
1496 if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1501 if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1507 // 32-bit floating point
1508 if (!isValueCorrect<deInt32>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1512 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1514 // Check if the end result equals one of the atomic args.
1515 bool matchFound = false;
1517 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1519 const IVec3 gid(x + i*gridSize.x(), y, z);
1520 matchFound = is64Bit ?
1521 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1523 floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1524 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1531 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1533 // Check if the end result equals one of the atomic args.
1534 bool matchFound = false;
1536 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1538 const IVec3 gid(x + i*gridSize.x(), y, z);
1539 matchFound = is64Bit ?
1540 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1542 floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1543 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1555 template <typename T>
1556 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1558 T reference = getOperationInitialValue<T>(m_operation);
1559 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1561 const IVec3 gid(x + i*gridSize.x(), y, z);
1562 T arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1563 reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1565 return (resultValue == reference);
1568 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1570 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation, m_useTransfer, m_readType, m_backingType);
1573 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1577 BinaryAtomicIntermValuesInstance (Context& context,
1579 const ImageType imageType,
1580 const tcu::UVec3& imageSize,
1581 const TextureFormat& format,
1582 const AtomicOperation operation,
1583 const bool useTransfer,
1584 const ShaderReadType shaderReadType,
1585 const ImageBackingType backingType)
1586 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation, useTransfer, shaderReadType, backingType) {}
1588 virtual deUint32 getOutputBufferSize (void) const;
1590 virtual void prepareResources (const bool useTransfer);
1591 virtual void prepareDescriptors (const bool isTexelBuffer);
1593 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const;
1594 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1595 const VkPipeline pipeline,
1596 const VkPipelineLayout pipelineLayout,
1597 const VkDescriptorSet descriptorSet,
1598 const VkDeviceSize& range,
1599 const bool useTransfer);
1601 virtual bool verifyResult (Allocation& outputBufferAllocation,
1602 const bool is64Bit) const;
1606 template <typename T>
1607 bool areValuesCorrect (tcu::ConstPixelBufferAccess& resultBuffer,
1608 const bool isFloatingPoint,
1612 const UVec3& gridSize,
1613 const IVec3 extendedGridSize) const;
1615 template <typename T>
1616 bool verifyRecursive (const deInt32 index,
1618 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1619 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1620 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1621 de::MovePtr<Image> m_intermResultsImage;
1622 Move<VkImageView> m_intermResultsImageView;
1625 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1627 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1630 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1632 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1633 const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1634 const UVec3 extendedLayerSize = isCubeBasedImage ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1635 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1637 createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1640 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool isTexelBuffer)
1642 const VkDescriptorType descriptorType = isTexelBuffer ?
1643 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1644 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1646 const VkDevice device = m_context.getDevice();
1647 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1649 m_descriptorSetLayout =
1650 DescriptorSetLayoutBuilder()
1651 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1652 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1653 .build(deviceInterface, device);
1656 DescriptorPoolBuilder()
1657 .addType(descriptorType, 2u)
1658 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1660 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1664 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1665 m_descIntermResultsBufferView = makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1667 DescriptorSetUpdateBuilder()
1668 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1669 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1670 .update(deviceInterface, device);
1674 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1675 const VkDescriptorImageInfo descIntermResultsImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1677 DescriptorSetUpdateBuilder()
1678 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1679 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1680 .update(deviceInterface, device);
1684 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1686 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1687 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1689 const VkImageMemoryBarrier imagePreDispatchBarrier =
1690 makeImageMemoryBarrier( 0u,
1691 VK_ACCESS_SHADER_WRITE_BIT,
1692 VK_IMAGE_LAYOUT_UNDEFINED,
1693 VK_IMAGE_LAYOUT_GENERAL,
1694 m_intermResultsImage->get(),
1697 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1700 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1701 const VkPipeline pipeline,
1702 const VkPipelineLayout pipelineLayout,
1703 const VkDescriptorSet descriptorSet,
1704 const VkDeviceSize& range,
1705 const bool useTransfer)
1707 // nothing is needed for texel image buffer
1708 if (m_imageType == IMAGE_TYPE_BUFFER)
1711 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1712 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1713 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1717 const VkImageMemoryBarrier imagePostDispatchBarrier =
1718 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1719 VK_ACCESS_TRANSFER_READ_BIT,
1720 VK_IMAGE_LAYOUT_GENERAL,
1721 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1722 m_intermResultsImage->get(),
1725 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1727 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1728 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1730 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1734 const VkDevice device = m_context.getDevice();
1735 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1736 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1738 DescriptorSetUpdateBuilder()
1739 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1740 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1741 .update(deviceInterface, device);
1743 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1744 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1745 VK_ACCESS_SHADER_READ_BIT,
1746 VK_IMAGE_LAYOUT_GENERAL,
1747 VK_IMAGE_LAYOUT_GENERAL,
1748 m_intermResultsImage->get(),
1751 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1752 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1753 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1754 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1755 1u, &resultImagePostDispatchBarrier);
1757 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1758 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1760 switch (m_imageType)
1762 case IMAGE_TYPE_1D_ARRAY:
1763 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1765 case IMAGE_TYPE_2D_ARRAY:
1766 case IMAGE_TYPE_CUBE:
1767 case IMAGE_TYPE_CUBE_ARRAY:
1768 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1771 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1777 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation& outputBufferAllocation,
1778 const bool is64Bit) const
1780 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1781 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1783 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1785 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1786 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1787 for (deUint32 x = 0; x < gridSize.x(); x++)
1789 if (isUintFormat(mapTextureFormat(m_format)))
1793 if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1798 if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1802 else if (isIntFormat(mapTextureFormat(m_format)))
1806 if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1811 if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1817 // 32-bit floating point
1818 if (!areValuesCorrect<deInt32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1826 template <typename T>
1827 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1829 T resultValues[NUM_INVOCATIONS_PER_PIXEL];
1830 T atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1831 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1833 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1835 IVec3 gid(x + i*gridSize.x(), y, z);
1836 T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1837 if (isFloatingPoint)
1840 deMemcpy(&fData, &data, sizeof(fData));
1841 data = static_cast<T>(fData);
1843 resultValues[i] = data;
1844 atomicArgs[i] = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1845 argsUsed[i] = false;
1848 // Verify that the return values form a valid sequence.
1849 return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1852 template <typename T>
1853 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32 index,
1855 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1856 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1857 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1859 if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1862 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1864 if (!argsUsed[i] && resultValues[i] == valueSoFar)
1868 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1873 argsUsed[i] = false;
1880 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1882 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation, m_useTransfer, m_readType, m_backingType);
1887 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1889 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
1893 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1894 : m_imageType (imageType)
1895 , m_imageSize (imageSize)
1898 const ImageType m_imageType;
1899 const tcu::UVec3 m_imageSize;
1902 const ImageParams imageParamsArray[] =
1904 ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
1905 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
1906 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
1907 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
1908 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(48u, 48u, 8u)),
1909 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
1910 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u)),
1911 ImageParams(IMAGE_TYPE_BUFFER, tcu::UVec3(64u, 1u, 1u))
1914 const tcu::TextureFormat formats[] =
1916 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1917 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1918 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1919 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1920 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1925 ShaderReadType type;
1929 { ShaderReadType::NORMAL, "normal_read" },
1930 { ShaderReadType::SPARSE, "sparse_read" },
1935 ImageBackingType type;
1939 { ImageBackingType::NORMAL, "normal_img" },
1940 { ImageBackingType::SPARSE, "sparse_img" },
1943 for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1945 const AtomicOperation operation = (AtomicOperation)operationI;
1947 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
1949 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
1951 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
1952 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
1954 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
1956 for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
1958 const bool useTransfer = (useTransferIdx > 0);
1959 const string groupName = (!useTransfer ? "no" : "") + string("transfer");
1961 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str(), ""));
1963 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
1965 const auto& readType = readTypes[readTypeIdx];
1967 de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name, ""));
1969 for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
1971 const auto& backingType = backingTypes[backingTypeIdx];
1973 de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name, ""));
1975 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
1977 const TextureFormat& format = formats[formatNdx];
1978 const std::string formatName = getShaderImageFormatQualifier(format);
1980 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
1981 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
1986 // Only 2D and 3D images may support sparse residency.
1987 const auto vkImageType = mapImageType(imageType);
1988 if (backingType.type == ImageBackingType::SPARSE && (vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D))
1991 // Only some operations are supported on floating-point
1992 if (format.type == tcu::TextureFormat::FLOAT)
1994 if (operation != ATOMIC_OPERATION_ADD &&
1995 operation != ATOMIC_OPERATION_EXCHANGE &&
1996 operation != ATOMIC_OPERATION_MIN &&
1997 operation != ATOMIC_OPERATION_MAX)
2003 if (readType.type == ShaderReadType::SPARSE)
2005 // When using transfer, shader reads will not be used, so avoid creating two identical cases.
2009 // Sparse reads are not supported for all types of images.
2010 if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
2014 //!< Atomic case checks the end result of the operations, and not the intermediate return values
2015 const string caseEndResult = formatName + "_end_result";
2016 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2018 //!< Atomic case checks the return values of the atomic function and not the end result.
2019 const string caseIntermValues = formatName + "_intermediate_values";
2020 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2023 readTypeGroup->addChild(backingTypeGroup.release());
2026 transferGroup->addChild(readTypeGroup.release());
2029 imageTypeGroup->addChild(transferGroup.release());
2032 operationGroup->addChild(imageTypeGroup.release());
2035 imageAtomicOperationsTests->addChild(operationGroup.release());
2038 return imageAtomicOperationsTests.release();