1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
44 #include "tcuTextureUtil.hpp"
45 #include "tcuTexture.hpp"
46 #include "tcuVectorType.hpp"
47 #include "tcuStringTemplate.hpp"
60 using tcu::TextureFormat;
71 using tcu::Texture2DArray;
72 using tcu::TextureCube;
73 using tcu::PixelBufferAccess;
74 using tcu::ConstPixelBufferAccess;
76 using tcu::TestContext;
80 NUM_INVOCATIONS_PER_PIXEL = 5u
85 ATOMIC_OPERATION_ADD = 0,
94 ATOMIC_OPERATION_EXCHANGE,
95 ATOMIC_OPERATION_COMPARE_EXCHANGE,
100 enum class ShaderReadType
106 enum class ImageBackingType
112 static string getCoordStr (const ImageType imageType,
113 const std::string& x,
114 const std::string& y,
115 const std::string& z)
120 case IMAGE_TYPE_BUFFER:
122 case IMAGE_TYPE_1D_ARRAY:
124 return string("ivec2(" + x + "," + y + ")");
125 case IMAGE_TYPE_2D_ARRAY:
127 case IMAGE_TYPE_CUBE:
128 case IMAGE_TYPE_CUBE_ARRAY:
129 return string("ivec3(" + x + "," + y + "," + z + ")");
136 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
138 DE_ASSERT(intFormat || uintFormat || floatFormat);
140 const bool is64 = (componentWidth == 64);
143 return (is64 ? "int64_t" : "int");
145 return (is64 ? "uint64_t" : "uint");
147 return (is64 ? "double" : "float");
152 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
154 DE_ASSERT(intFormat || uintFormat || floatFormat);
156 const bool is64 = (componentWidth == 64);
159 return (is64 ? "i64vec4" : "ivec4");
161 return (is64 ? "u64vec4" : "uvec4");
163 return (is64 ? "f64vec4" : "vec4");
168 static string getAtomicFuncArgumentShaderStr (const AtomicOperation op,
172 const IVec3& gridSize)
176 case ATOMIC_OPERATION_ADD:
177 case ATOMIC_OPERATION_AND:
178 case ATOMIC_OPERATION_OR:
179 case ATOMIC_OPERATION_XOR:
180 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
181 case ATOMIC_OPERATION_MIN:
182 case ATOMIC_OPERATION_MAX:
183 // multiply by (1-2*(value % 2) to make half of the data negative
184 // this will result in generating large numbers for uint formats
185 return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
186 case ATOMIC_OPERATION_EXCHANGE:
187 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
188 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
195 static string getAtomicOperationCaseName (const AtomicOperation op)
199 case ATOMIC_OPERATION_ADD: return string("add");
200 case ATOMIC_OPERATION_SUB: return string("sub");
201 case ATOMIC_OPERATION_INC: return string("inc");
202 case ATOMIC_OPERATION_DEC: return string("dec");
203 case ATOMIC_OPERATION_MIN: return string("min");
204 case ATOMIC_OPERATION_MAX: return string("max");
205 case ATOMIC_OPERATION_AND: return string("and");
206 case ATOMIC_OPERATION_OR: return string("or");
207 case ATOMIC_OPERATION_XOR: return string("xor");
208 case ATOMIC_OPERATION_EXCHANGE: return string("exchange");
209 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("compare_exchange");
216 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
220 case ATOMIC_OPERATION_ADD: return string("imageAtomicAdd");
221 case ATOMIC_OPERATION_MIN: return string("imageAtomicMin");
222 case ATOMIC_OPERATION_MAX: return string("imageAtomicMax");
223 case ATOMIC_OPERATION_AND: return string("imageAtomicAnd");
224 case ATOMIC_OPERATION_OR: return string("imageAtomicOr");
225 case ATOMIC_OPERATION_XOR: return string("imageAtomicXor");
226 case ATOMIC_OPERATION_EXCHANGE: return string("imageAtomicExchange");
227 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("imageAtomicCompSwap");
234 template <typename T>
235 T getOperationInitialValue (const AtomicOperation op)
239 // \note 18 is just an arbitrary small nonzero value.
240 case ATOMIC_OPERATION_ADD: return 18;
241 case ATOMIC_OPERATION_INC: return 18;
242 case ATOMIC_OPERATION_SUB: return (1 << 24) - 1;
243 case ATOMIC_OPERATION_DEC: return (1 << 24) - 1;
244 case ATOMIC_OPERATION_MIN: return (1 << 15) - 1;
245 case ATOMIC_OPERATION_MAX: return 18;
246 case ATOMIC_OPERATION_AND: return (1 << 15) - 1;
247 case ATOMIC_OPERATION_OR: return 18;
248 case ATOMIC_OPERATION_XOR: return 18;
249 case ATOMIC_OPERATION_EXCHANGE: return 18;
250 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 18;
258 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
262 // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
263 case ATOMIC_OPERATION_ADD: return 0x000000BEFFFFFF18;
264 case ATOMIC_OPERATION_INC: return 0x000000BEFFFFFF18;
265 case ATOMIC_OPERATION_SUB: return (1ull << 56) - 1;
266 case ATOMIC_OPERATION_DEC: return (1ull << 56) - 1;
267 case ATOMIC_OPERATION_MIN: return (1ull << 47) - 1;
268 case ATOMIC_OPERATION_MAX: return 0x000000BEFFFFFF18;
269 case ATOMIC_OPERATION_AND: return (1ull << 47) - 1;
270 case ATOMIC_OPERATION_OR: return 0x000000BEFFFFFF18;
271 case ATOMIC_OPERATION_XOR: return 0x000000BEFFFFFF18;
272 case ATOMIC_OPERATION_EXCHANGE: return 0x000000BEFFFFFF18;
273 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 0x000000BEFFFFFF18;
276 return 0xFFFFFFFFFFFFFFFF;
281 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
283 return (deUint64)getOperationInitialValue<deInt64>(op);
287 template <typename T>
288 static T getAtomicFuncArgument (const AtomicOperation op,
289 const IVec3& invocationID,
290 const IVec3& gridSize)
292 const T x = static_cast<T>(invocationID.x());
293 const T y = static_cast<T>(invocationID.y());
294 const T z = static_cast<T>(invocationID.z());
298 // \note Fall-throughs.
299 case ATOMIC_OPERATION_ADD:
300 case ATOMIC_OPERATION_SUB:
301 case ATOMIC_OPERATION_AND:
302 case ATOMIC_OPERATION_OR:
303 case ATOMIC_OPERATION_XOR:
304 return x*x + y*y + z*z;
305 case ATOMIC_OPERATION_INC:
306 case ATOMIC_OPERATION_DEC:
308 case ATOMIC_OPERATION_MIN:
309 case ATOMIC_OPERATION_MAX:
310 // multiply half of the data by -1
311 return (1-2*(x % 2))*(x*x + y*y + z*z);
312 case ATOMIC_OPERATION_EXCHANGE:
313 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
314 return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
321 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
322 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
324 return op == ATOMIC_OPERATION_ADD ||
325 op == ATOMIC_OPERATION_SUB ||
326 op == ATOMIC_OPERATION_INC ||
327 op == ATOMIC_OPERATION_DEC ||
328 op == ATOMIC_OPERATION_MIN ||
329 op == ATOMIC_OPERATION_MAX ||
330 op == ATOMIC_OPERATION_AND ||
331 op == ATOMIC_OPERATION_OR ||
332 op == ATOMIC_OPERATION_XOR;
335 //! Checks if the operation needs an SPIR-V shader.
336 static bool isSpirvAtomicOperation (const AtomicOperation op)
338 return op == ATOMIC_OPERATION_SUB ||
339 op == ATOMIC_OPERATION_INC ||
340 op == ATOMIC_OPERATION_DEC;
343 //! Returns the SPIR-V assembler name of the given operation.
344 static std::string getSpirvAtomicOpName (const AtomicOperation op)
348 case ATOMIC_OPERATION_SUB: return "OpAtomicISub";
349 case ATOMIC_OPERATION_INC: return "OpAtomicIIncrement";
350 case ATOMIC_OPERATION_DEC: return "OpAtomicIDecrement";
358 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
359 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
363 case ATOMIC_OPERATION_SUB: return false;
364 case ATOMIC_OPERATION_INC: // fallthrough
365 case ATOMIC_OPERATION_DEC: return true;
373 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
374 template <typename T>
375 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
379 case ATOMIC_OPERATION_INC: // fallthrough.
380 case ATOMIC_OPERATION_ADD: return a + b;
381 case ATOMIC_OPERATION_DEC: // fallthrough.
382 case ATOMIC_OPERATION_SUB: return a - b;
383 case ATOMIC_OPERATION_MIN: return de::min(a, b);
384 case ATOMIC_OPERATION_MAX: return de::max(a, b);
385 case ATOMIC_OPERATION_AND: return a & b;
386 case ATOMIC_OPERATION_OR: return a | b;
387 case ATOMIC_OPERATION_XOR: return a ^ b;
388 case ATOMIC_OPERATION_EXCHANGE: return b;
389 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
396 VkImageUsageFlags getUsageFlags (bool useTransfer)
398 VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
401 usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
406 void AddFillReadShader (SourceCollections& sourceCollections,
407 const ImageType& imageType,
408 const tcu::TextureFormat& format,
409 const string& componentType,
410 const string& vec4Type)
412 const string imageInCoord = getCoordStr(imageType, "gx", "gy", "gz");
413 const string shaderImageFormatStr = getShaderImageFormatQualifier(format);
414 const string shaderImageTypeStr = getShaderImageType(format, imageType);
415 const auto componentWidth = getFormatComponentWidth(mapTextureFormat(format), 0u);
416 const string extensions = ((componentWidth == 64u)
417 ? "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
418 "#extension GL_EXT_shader_image_int64 : require\n"
422 const string fillShader = "#version 450\n"
424 "precision highp " + shaderImageTypeStr + ";\n"
426 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
427 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
429 "layout(std430, binding = 1) buffer inputBuffer\n"
431 " "+ componentType + " data[];\n"
436 " int gx = int(gl_GlobalInvocationID.x);\n"
437 " int gy = int(gl_GlobalInvocationID.y);\n"
438 " int gz = int(gl_GlobalInvocationID.z);\n"
439 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
440 " imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
443 const string readShader = "#version 450\n"
445 "precision highp " + shaderImageTypeStr + ";\n"
447 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
448 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
450 "layout(std430, binding = 1) buffer outputBuffer\n"
452 " " + componentType + " data[];\n"
457 " int gx = int(gl_GlobalInvocationID.x);\n"
458 " int gy = int(gl_GlobalInvocationID.y);\n"
459 " int gz = int(gl_GlobalInvocationID.z);\n"
460 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
461 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
465 if ((imageType != IMAGE_TYPE_1D) &&
466 (imageType != IMAGE_TYPE_1D_ARRAY) &&
467 (imageType != IMAGE_TYPE_BUFFER))
469 const string readShaderResidency = "#version 450\n"
470 "#extension GL_ARB_sparse_texture2 : require\n"
472 "precision highp " + shaderImageTypeStr + ";\n"
474 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
475 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
477 "layout(std430, binding = 1) buffer outputBuffer\n"
479 " " + componentType + " data[];\n"
484 " int gx = int(gl_GlobalInvocationID.x);\n"
485 " int gy = int(gl_GlobalInvocationID.y);\n"
486 " int gz = int(gl_GlobalInvocationID.z);\n"
487 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
488 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
489 " " + vec4Type + " sparseValue;\n"
490 " sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
491 " if (outBuffer.data[index] != sparseValue.x)\n"
492 " outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
495 sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
498 sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
499 sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
502 //! Prepare the initial data for the image
503 static void initDataForImage (const VkDevice device,
504 const DeviceInterface& deviceInterface,
505 const TextureFormat& format,
506 const AtomicOperation operation,
507 const tcu::UVec3& gridSize,
510 Allocation& bufferAllocation = buffer.getAllocation();
511 const VkFormat imageFormat = mapTextureFormat(format);
512 tcu::PixelBufferAccess pixelBuffer (format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
514 if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
516 const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
518 for (deUint32 z = 0; z < gridSize.z(); z++)
519 for (deUint32 y = 0; y < gridSize.y(); y++)
520 for (deUint32 x = 0; x < gridSize.x(); x++)
522 *((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
527 const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
529 for (deUint32 z = 0; z < gridSize.z(); z++)
530 for (deUint32 y = 0; y < gridSize.y(); y++)
531 for (deUint32 x = 0; x < gridSize.x(); x++)
533 pixelBuffer.setPixel(initialValue, x, y, z);
537 flushAlloc(deviceInterface, device, bufferAllocation);
540 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, ImageType imageType, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
542 const VkFormat format = mapTextureFormat(tcuFormat);
543 const VkImageType vkImgType = mapImageType(imageType);
544 const VkFormatFeatureFlags texelBufferSupport = (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
545 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
546 context.getPhysicalDevice(), format);
548 if ((imageType == IMAGE_TYPE_BUFFER) &&
549 ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
550 TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
552 if (imageType == IMAGE_TYPE_CUBE_ARRAY)
553 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
555 if (backingType == ImageBackingType::SPARSE)
557 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
561 case VK_IMAGE_TYPE_2D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
562 case VK_IMAGE_TYPE_3D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
563 default: DE_ASSERT(false); break;
566 if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, getUsageFlags(useTransfer), VK_IMAGE_TILING_OPTIMAL))
567 TCU_THROW(NotSupportedError, "Format does not support sparse images");
570 if (isFloatFormat(format))
572 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
574 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
575 const auto& atomicFloatFeatures = context.getShaderAtomicFloatFeaturesEXT();
577 if (!atomicFloatFeatures.shaderImageFloat32Atomics)
578 TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
580 if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
581 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
583 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
584 TCU_FAIL("Required format feature bits not supported");
586 if (backingType == ImageBackingType::SPARSE)
588 if (!atomicFloatFeatures.sparseImageFloat32Atomics)
589 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
591 if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
592 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
596 else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
598 context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
600 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
601 const auto& atomicInt64Features = context.getShaderImageAtomicInt64FeaturesEXT();
603 if (!atomicInt64Features.shaderImageInt64Atomics)
604 TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
606 if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
607 TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
609 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
610 TCU_FAIL("Mandatory format features not supported");
615 const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
616 if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
617 TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
620 if (readType == ShaderReadType::SPARSE)
622 DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
623 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
627 class BinaryAtomicEndResultCase : public vkt::TestCase
630 BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
632 const string& description,
633 const ImageType imageType,
634 const tcu::UVec3& imageSize,
635 const tcu::TextureFormat& format,
636 const AtomicOperation operation,
637 const bool useTransfer,
638 const ShaderReadType shaderReadType,
639 const ImageBackingType backingType,
640 const glu::GLSLVersion glslVersion);
642 void initPrograms (SourceCollections& sourceCollections) const;
643 TestInstance* createInstance (Context& context) const;
644 virtual void checkSupport (Context& context) const;
647 const ImageType m_imageType;
648 const tcu::UVec3 m_imageSize;
649 const tcu::TextureFormat m_format;
650 const AtomicOperation m_operation;
651 const bool m_useTransfer;
652 const ShaderReadType m_readType;
653 const ImageBackingType m_backingType;
654 const glu::GLSLVersion m_glslVersion;
657 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
659 const string& description,
660 const ImageType imageType,
661 const tcu::UVec3& imageSize,
662 const tcu::TextureFormat& format,
663 const AtomicOperation operation,
664 const bool useTransfer,
665 const ShaderReadType shaderReadType,
666 const ImageBackingType backingType,
667 const glu::GLSLVersion glslVersion)
668 : TestCase (testCtx, name, description)
669 , m_imageType (imageType)
670 , m_imageSize (imageSize)
672 , m_operation (operation)
673 , m_useTransfer (useTransfer)
674 , m_readType (shaderReadType)
675 , m_backingType (backingType)
676 , m_glslVersion (glslVersion)
680 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
682 commonCheckSupport(context, m_format, m_imageType, m_operation, m_useTransfer, m_readType, m_backingType);
685 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
687 const VkFormat imageFormat = mapTextureFormat(m_format);
688 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
689 const bool intFormat = isIntFormat(imageFormat);
690 const bool uintFormat = isUintFormat(imageFormat);
691 const bool floatFormat = isFloatFormat(imageFormat);
692 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
693 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
695 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
697 if (isSpirvAtomicOperation(m_operation))
699 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
700 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
701 std::map<std::string, std::string> specializations;
703 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
704 if (isSpirvAtomicNoLastArgOp(m_operation))
705 specializations["LASTARG"] = "";
707 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
711 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
713 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
714 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
716 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
718 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
720 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
721 (componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
723 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
724 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
725 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
726 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n#extension GL_KHR_memory_scope_semantics : enable ";
728 string source = versionDecl + "\n" + extensions + "\n";
730 if (64 == componentWidth)
732 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
733 "#extension GL_EXT_shader_image_int64 : require\n";
736 source += "precision highp " + shaderImageTypeStr + ";\n"
738 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
739 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
743 " int gx = int(gl_GlobalInvocationID.x);\n"
744 " int gy = int(gl_GlobalInvocationID.y);\n"
745 " int gz = int(gl_GlobalInvocationID.z);\n"
746 " " + atomicInvocation + ";\n"
749 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
753 class BinaryAtomicIntermValuesCase : public vkt::TestCase
756 BinaryAtomicIntermValuesCase (tcu::TestContext& testCtx,
758 const string& description,
759 const ImageType imageType,
760 const tcu::UVec3& imageSize,
761 const tcu::TextureFormat& format,
762 const AtomicOperation operation,
763 const bool useTransfer,
764 const ShaderReadType shaderReadType,
765 const ImageBackingType backingType,
766 const glu::GLSLVersion glslVersion);
768 void initPrograms (SourceCollections& sourceCollections) const;
769 TestInstance* createInstance (Context& context) const;
770 virtual void checkSupport (Context& context) const;
773 const ImageType m_imageType;
774 const tcu::UVec3 m_imageSize;
775 const tcu::TextureFormat m_format;
776 const AtomicOperation m_operation;
777 const bool m_useTransfer;
778 const ShaderReadType m_readType;
779 const ImageBackingType m_backingType;
780 const glu::GLSLVersion m_glslVersion;
783 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext& testCtx,
785 const string& description,
786 const ImageType imageType,
787 const tcu::UVec3& imageSize,
788 const TextureFormat& format,
789 const AtomicOperation operation,
790 const bool useTransfer,
791 const ShaderReadType shaderReadType,
792 const ImageBackingType backingType,
793 const glu::GLSLVersion glslVersion)
794 : TestCase (testCtx, name, description)
795 , m_imageType (imageType)
796 , m_imageSize (imageSize)
798 , m_operation (operation)
799 , m_useTransfer (useTransfer)
800 , m_readType (shaderReadType)
801 , m_backingType (backingType)
802 , m_glslVersion (glslVersion)
806 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
808 commonCheckSupport(context, m_format, m_imageType, m_operation, m_useTransfer, m_readType, m_backingType);
811 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
813 const VkFormat imageFormat = mapTextureFormat(m_format);
814 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
815 const bool intFormat = isIntFormat(imageFormat);
816 const bool uintFormat = isUintFormat(imageFormat);
817 const bool floatFormat = isFloatFormat(imageFormat);
818 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
819 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
821 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
823 if (isSpirvAtomicOperation(m_operation))
825 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
826 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
827 std::map<std::string, std::string> specializations;
829 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
830 if (isSpirvAtomicNoLastArgOp(m_operation))
831 specializations["LASTARG"] = "";
833 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
837 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
838 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
839 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
840 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
841 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
843 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
845 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
846 (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
848 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) +
849 "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
850 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
851 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
852 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n#extension GL_KHR_memory_scope_semantics : enable ";
854 string source = versionDecl + "\n" + extensions + "\n"
857 if (64 == componentWidth)
859 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
860 "#extension GL_EXT_shader_image_int64 : require\n";
863 source += "precision highp " + shaderImageTypeStr + "; \n"
864 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
865 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
866 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
870 " int gx = int(gl_GlobalInvocationID.x);\n"
871 " int gy = int(gl_GlobalInvocationID.y);\n"
872 " int gz = int(gl_GlobalInvocationID.z);\n"
873 " imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
876 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
880 class BinaryAtomicInstanceBase : public vkt::TestInstance
884 BinaryAtomicInstanceBase (Context& context,
886 const ImageType imageType,
887 const tcu::UVec3& imageSize,
888 const TextureFormat& format,
889 const AtomicOperation operation,
890 const bool useTransfer,
891 const ShaderReadType shaderReadType,
892 const ImageBackingType backingType);
894 tcu::TestStatus iterate (void);
896 virtual deUint32 getOutputBufferSize (void) const = 0;
898 virtual void prepareResources (const bool useTransfer) = 0;
899 virtual void prepareDescriptors (const bool isTexelBuffer) = 0;
901 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const = 0;
902 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
903 const VkPipeline pipeline,
904 const VkPipelineLayout pipelineLayout,
905 const VkDescriptorSet descriptorSet,
906 const VkDeviceSize& range,
907 const bool useTransfer) = 0;
909 virtual bool verifyResult (Allocation& outputBufferAllocation,
910 const bool is64Bit) const = 0;
914 void shaderFillImage (const VkCommandBuffer cmdBuffer,
915 const VkBuffer& buffer,
916 const VkPipeline pipeline,
917 const VkPipelineLayout pipelineLayout,
918 const VkDescriptorSet descriptorSet,
919 const VkDeviceSize& range,
920 const tcu::UVec3& gridSize);
922 void createImageAndView (VkFormat imageFormat,
923 const tcu::UVec3& imageExent,
925 de::MovePtr<Image>& imagePtr,
926 Move<VkImageView>& imageViewPtr);
928 void createImageResources (const VkFormat& imageFormat,
929 const bool useTransfer);
932 const ImageType m_imageType;
933 const tcu::UVec3 m_imageSize;
934 const TextureFormat m_format;
935 const AtomicOperation m_operation;
936 const bool m_useTransfer;
937 const ShaderReadType m_readType;
938 const ImageBackingType m_backingType;
940 de::MovePtr<Buffer> m_inputBuffer;
941 de::MovePtr<Buffer> m_outputBuffer;
942 Move<VkBufferView> m_descResultBufferView;
943 Move<VkBufferView> m_descIntermResultsBufferView;
944 Move<VkDescriptorPool> m_descriptorPool;
945 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
946 Move<VkDescriptorSet> m_descriptorSet;
948 Move<VkDescriptorSetLayout> m_descriptorSetLayoutNoTransfer;
949 Move<VkDescriptorPool> m_descriptorPoolNoTransfer;
951 de::MovePtr<Image> m_resultImage;
952 Move<VkImageView> m_resultImageView;
954 std::vector<VkSemaphore> m_waitSemaphores;
957 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context& context,
959 const ImageType imageType,
960 const tcu::UVec3& imageSize,
961 const TextureFormat& format,
962 const AtomicOperation operation,
963 const bool useTransfer,
964 const ShaderReadType shaderReadType,
965 const ImageBackingType backingType)
966 : vkt::TestInstance (context)
968 , m_imageType (imageType)
969 , m_imageSize (imageSize)
971 , m_operation (operation)
972 , m_useTransfer (useTransfer)
973 , m_readType (shaderReadType)
974 , m_backingType (backingType)
978 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
980 const VkDevice device = m_context.getDevice();
981 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
982 const VkQueue queue = m_context.getUniversalQueue();
983 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
984 Allocator& allocator = m_context.getDefaultAllocator();
985 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
986 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
987 const VkFormat imageFormat = mapTextureFormat(m_format);
988 const bool isTexelBuffer = (m_imageType == IMAGE_TYPE_BUFFER);
992 createImageResources(imageFormat, m_useTransfer);
995 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
997 //Prepare the buffer with the initial data for the image
998 m_inputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface,
1001 makeBufferCreateInfo(imageSizeInBytes,
1002 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1003 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1004 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1005 MemoryRequirement::HostVisible));
1007 // Fill in buffer with initial data used for image.
1008 initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1010 // Create a buffer to store shader output copied from result image
1011 m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface,
1014 makeBufferCreateInfo(outBuffSizeInBytes,
1015 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1016 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1017 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1018 MemoryRequirement::HostVisible));
1022 prepareResources(m_useTransfer);
1025 prepareDescriptors(isTexelBuffer);
1027 Move<VkDescriptorSet> descriptorSetFillImage;
1028 Move<VkShaderModule> shaderModuleFillImage;
1029 Move<VkPipelineLayout> pipelineLayoutFillImage;
1030 Move<VkPipeline> pipelineFillImage;
1032 Move<VkDescriptorSet> descriptorSetReadImage;
1033 Move<VkShaderModule> shaderModuleReadImage;
1034 Move<VkPipelineLayout> pipelineLayoutReadImage;
1035 Move<VkPipeline> pipelineReadImage;
1039 m_descriptorSetLayoutNoTransfer =
1040 DescriptorSetLayoutBuilder()
1041 .addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1042 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1043 .build(deviceInterface, device);
1045 m_descriptorPoolNoTransfer =
1046 DescriptorPoolBuilder()
1047 .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1048 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1049 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1051 descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1053 *m_descriptorPoolNoTransfer,
1054 *m_descriptorSetLayoutNoTransfer);
1056 descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1058 *m_descriptorPoolNoTransfer,
1059 *m_descriptorSetLayoutNoTransfer);
1061 shaderModuleFillImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1062 pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1063 pipelineFillImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1065 if (m_readType == ShaderReadType::SPARSE)
1067 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1071 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1073 pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1074 pipelineReadImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1078 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1079 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1080 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1082 // Create command buffer
1083 const Unique<VkCommandPool> cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1084 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1086 beginCommandBuffer(deviceInterface, *cmdBuffer);
1092 const vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1093 copyBufferToImage(deviceInterface,
1098 VK_IMAGE_ASPECT_COLOR_BIT,
1100 getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1104 shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1106 commandsBeforeCompute(*cmdBuffer);
1109 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1110 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1112 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1114 commandsAfterCompute(*cmdBuffer,
1116 *pipelineLayoutReadImage,
1117 *descriptorSetReadImage,
1121 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier
1122 = makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1123 VK_ACCESS_HOST_READ_BIT,
1124 m_outputBuffer->get(),
1126 outBuffSizeInBytes);
1128 deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1129 ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1130 VK_PIPELINE_STAGE_HOST_BIT,
1131 DE_FALSE, 0u, DE_NULL,
1132 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1134 endCommandBuffer(deviceInterface, *cmdBuffer);
1136 std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1137 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1138 static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1140 Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1142 invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1144 if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1145 return tcu::TestStatus::pass("Comparison succeeded");
1147 return tcu::TestStatus::fail("Comparison failed");
1150 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer cmdBuffer,
1151 const VkBuffer& buffer,
1152 const VkPipeline pipeline,
1153 const VkPipelineLayout pipelineLayout,
1154 const VkDescriptorSet descriptorSet,
1155 const VkDeviceSize& range,
1156 const tcu::UVec3& gridSize)
1158 const VkDevice device = m_context.getDevice();
1159 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1160 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1161 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(buffer, 0, range);
1162 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1164 DescriptorSetUpdateBuilder()
1165 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1166 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1167 .update(deviceInterface, device);
1169 const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1170 VK_ACCESS_SHADER_WRITE_BIT,
1171 VK_IMAGE_LAYOUT_UNDEFINED,
1172 VK_IMAGE_LAYOUT_GENERAL,
1173 m_resultImage->get(),
1176 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1177 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1178 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1179 (VkDependencyFlags)0,
1180 0, (const VkMemoryBarrier*)DE_NULL,
1181 0, (const VkBufferMemoryBarrier*)DE_NULL,
1182 1, &imageBarrierPre);
1184 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1185 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1187 deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1189 const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1190 VK_ACCESS_SHADER_READ_BIT,
1191 VK_IMAGE_LAYOUT_GENERAL,
1192 VK_IMAGE_LAYOUT_GENERAL,
1193 m_resultImage->get(),
1196 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1197 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1198 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1199 (VkDependencyFlags)0,
1200 0, (const VkMemoryBarrier*)DE_NULL,
1201 0, (const VkBufferMemoryBarrier*)DE_NULL,
1202 1, &imageBarrierPost);
1205 void BinaryAtomicInstanceBase::createImageAndView (VkFormat imageFormat,
1206 const tcu::UVec3& imageExent,
1208 de::MovePtr<Image>& imagePtr,
1209 Move<VkImageView>& imageViewPtr)
1211 const VkDevice device = m_context.getDevice();
1212 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1213 Allocator& allocator = m_context.getDefaultAllocator();
1214 const VkImageUsageFlags usageFlags = getUsageFlags(useTransfer);
1215 VkImageCreateFlags createFlags = 0u;
1217 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1218 createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1220 const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1222 VkImageCreateInfo createInfo =
1224 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1225 DE_NULL, // const void* pNext;
1226 createFlags, // VkImageCreateFlags flags;
1227 mapImageType(m_imageType), // VkImageType imageType;
1228 imageFormat, // VkFormat format;
1229 makeExtent3D(imageExent), // VkExtent3D extent;
1230 1u, // deUint32 mipLevels;
1231 numLayers, // deUint32 arrayLayers;
1232 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1233 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
1234 usageFlags, // VkImageUsageFlags usage;
1235 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1236 0u, // deUint32 queueFamilyIndexCount;
1237 DE_NULL, // const deUint32* pQueueFamilyIndices;
1238 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1241 if (m_backingType == ImageBackingType::SPARSE)
1243 const auto& vki = m_context.getInstanceInterface();
1244 const auto physicalDevice = m_context.getPhysicalDevice();
1245 const auto sparseQueue = m_context.getSparseQueue();
1246 const auto sparseQueueIdx = m_context.getSparseQueueFamilyIndex();
1247 const auto universalQIdx = m_context.getUniversalQueueFamilyIndex();
1248 const deUint32 queueIndices[] = { universalQIdx, sparseQueueIdx };
1250 createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1252 if (sparseQueueIdx != universalQIdx)
1254 createInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
1255 createInfo.queueFamilyIndexCount = static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1256 createInfo.pQueueFamilyIndices = queueIndices;
1259 const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1260 m_waitSemaphores.push_back(sparseImage->getSemaphore());
1261 imagePtr = de::MovePtr<Image>(sparseImage);
1264 imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1266 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1268 imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1271 void BinaryAtomicInstanceBase::createImageResources (const VkFormat& imageFormat,
1272 const bool useTransfer)
1274 //Create the image that is going to store results of atomic operations
1275 createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1278 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1282 BinaryAtomicEndResultInstance (Context& context,
1284 const ImageType imageType,
1285 const tcu::UVec3& imageSize,
1286 const TextureFormat& format,
1287 const AtomicOperation operation,
1288 const bool useTransfer,
1289 const ShaderReadType shaderReadType,
1290 const ImageBackingType backingType)
1291 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation, useTransfer, shaderReadType, backingType) {}
1293 virtual deUint32 getOutputBufferSize (void) const;
1295 virtual void prepareResources (const bool useTransfer) { DE_UNREF(useTransfer); }
1296 virtual void prepareDescriptors (const bool isTexelBuffer);
1298 virtual void commandsBeforeCompute (const VkCommandBuffer) const {}
1299 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1300 const VkPipeline pipeline,
1301 const VkPipelineLayout pipelineLayout,
1302 const VkDescriptorSet descriptorSet,
1303 const VkDeviceSize& range,
1304 const bool useTransfer);
1306 virtual bool verifyResult (Allocation& outputBufferAllocation,
1307 const bool is64Bit) const;
1311 template <typename T>
1312 bool isValueCorrect (const T resultValue,
1316 const UVec3& gridSize,
1317 const IVec3 extendedGridSize) const;
1320 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1322 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1325 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool isTexelBuffer)
1327 const VkDescriptorType descriptorType = isTexelBuffer ?
1328 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1329 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1330 const VkDevice device = m_context.getDevice();
1331 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1333 m_descriptorSetLayout =
1334 DescriptorSetLayoutBuilder()
1335 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1336 .build(deviceInterface, device);
1339 DescriptorPoolBuilder()
1340 .addType(descriptorType)
1341 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1343 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1347 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1349 DescriptorSetUpdateBuilder()
1350 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1351 .update(deviceInterface, device);
1355 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1357 DescriptorSetUpdateBuilder()
1358 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1359 .update(deviceInterface, device);
1363 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1364 const VkPipeline pipeline,
1365 const VkPipelineLayout pipelineLayout,
1366 const VkDescriptorSet descriptorSet,
1367 const VkDeviceSize& range,
1368 const bool useTransfer)
1370 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1371 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1372 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1374 if (m_imageType == IMAGE_TYPE_BUFFER)
1376 m_outputBuffer = m_inputBuffer;
1378 else if (useTransfer)
1380 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1381 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1382 VK_ACCESS_TRANSFER_READ_BIT,
1383 VK_IMAGE_LAYOUT_GENERAL,
1384 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1385 m_resultImage->get(),
1388 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1389 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1390 VK_PIPELINE_STAGE_TRANSFER_BIT,
1391 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1392 1u, &resultImagePostDispatchBarrier);
1394 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1396 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1400 const VkDevice device = m_context.getDevice();
1401 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1402 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1404 DescriptorSetUpdateBuilder()
1405 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1406 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1407 .update(deviceInterface, device);
1409 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1410 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1411 VK_ACCESS_SHADER_READ_BIT,
1412 VK_IMAGE_LAYOUT_GENERAL,
1413 VK_IMAGE_LAYOUT_GENERAL,
1414 m_resultImage->get(),
1417 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1418 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1419 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1420 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1421 1u, &resultImagePostDispatchBarrier);
1423 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1424 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1426 switch (m_imageType)
1428 case IMAGE_TYPE_1D_ARRAY:
1429 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1431 case IMAGE_TYPE_2D_ARRAY:
1432 case IMAGE_TYPE_CUBE:
1433 case IMAGE_TYPE_CUBE_ARRAY:
1434 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1437 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1443 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation,
1444 const bool is64Bit) const
1446 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1447 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1449 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1451 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1452 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1453 for (deInt32 x = 0; x < resultBuffer.getWidth(); x++)
1455 const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1456 deUint32 floatToUnsignedValue = 0;
1457 bool isFloatValue = false;
1458 if (isFloatFormat(mapTextureFormat(m_format)))
1460 isFloatValue = true;
1461 floatToUnsignedValue = static_cast<deUint32>(*((float*)resultValue));
1464 if (isOrderIndependentAtomicOperation(m_operation))
1466 if (isUintFormat(mapTextureFormat(m_format)))
1470 if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1475 if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1479 else if (isIntFormat(mapTextureFormat(m_format)))
1483 if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1488 if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1494 // 32-bit floating point
1495 if (!isValueCorrect<deUint32>(floatToUnsignedValue, x, y, z, gridSize, extendedGridSize))
1499 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1501 // Check if the end result equals one of the atomic args.
1502 bool matchFound = false;
1504 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1506 const IVec3 gid(x + i*gridSize.x(), y, z);
1507 matchFound = is64Bit ?
1508 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1510 floatToUnsignedValue == getAtomicFuncArgument<deUint32>(m_operation, gid, extendedGridSize) :
1511 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1518 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1520 // Check if the end result equals one of the atomic args.
1521 bool matchFound = false;
1523 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1525 const IVec3 gid(x + i*gridSize.x(), y, z);
1526 matchFound = is64Bit ?
1527 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1529 floatToUnsignedValue == getAtomicFuncArgument<deUint32>(m_operation, gid, extendedGridSize) :
1530 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1542 template <typename T>
1543 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1545 T reference = getOperationInitialValue<T>(m_operation);
1546 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1548 const IVec3 gid(x + i*gridSize.x(), y, z);
1549 T arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1550 reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1552 return (resultValue == reference);
1555 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1557 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation, m_useTransfer, m_readType, m_backingType);
1560 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1564 BinaryAtomicIntermValuesInstance (Context& context,
1566 const ImageType imageType,
1567 const tcu::UVec3& imageSize,
1568 const TextureFormat& format,
1569 const AtomicOperation operation,
1570 const bool useTransfer,
1571 const ShaderReadType shaderReadType,
1572 const ImageBackingType backingType)
1573 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation, useTransfer, shaderReadType, backingType) {}
1575 virtual deUint32 getOutputBufferSize (void) const;
1577 virtual void prepareResources (const bool useTransfer);
1578 virtual void prepareDescriptors (const bool isTexelBuffer);
1580 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const;
1581 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1582 const VkPipeline pipeline,
1583 const VkPipelineLayout pipelineLayout,
1584 const VkDescriptorSet descriptorSet,
1585 const VkDeviceSize& range,
1586 const bool useTransfer);
1588 virtual bool verifyResult (Allocation& outputBufferAllocation,
1589 const bool is64Bit) const;
1593 template <typename T>
1594 bool areValuesCorrect (tcu::ConstPixelBufferAccess& resultBuffer,
1595 const bool isFloatingPoint,
1599 const UVec3& gridSize,
1600 const IVec3 extendedGridSize) const;
1602 template <typename T>
1603 bool verifyRecursive (const deInt32 index,
1605 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1606 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1607 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1608 de::MovePtr<Image> m_intermResultsImage;
1609 Move<VkImageView> m_intermResultsImageView;
1612 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1614 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1617 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1619 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1620 const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1621 const UVec3 extendedLayerSize = isCubeBasedImage ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1622 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1624 createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1627 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool isTexelBuffer)
1629 const VkDescriptorType descriptorType = isTexelBuffer ?
1630 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1631 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1633 const VkDevice device = m_context.getDevice();
1634 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1636 m_descriptorSetLayout =
1637 DescriptorSetLayoutBuilder()
1638 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1639 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1640 .build(deviceInterface, device);
1643 DescriptorPoolBuilder()
1644 .addType(descriptorType, 2u)
1645 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1647 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1651 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1652 m_descIntermResultsBufferView = makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1654 DescriptorSetUpdateBuilder()
1655 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1656 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1657 .update(deviceInterface, device);
1661 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1662 const VkDescriptorImageInfo descIntermResultsImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1664 DescriptorSetUpdateBuilder()
1665 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1666 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1667 .update(deviceInterface, device);
1671 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1673 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1674 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1676 const VkImageMemoryBarrier imagePreDispatchBarrier =
1677 makeImageMemoryBarrier( 0u,
1678 VK_ACCESS_SHADER_WRITE_BIT,
1679 VK_IMAGE_LAYOUT_UNDEFINED,
1680 VK_IMAGE_LAYOUT_GENERAL,
1681 m_intermResultsImage->get(),
1684 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1687 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1688 const VkPipeline pipeline,
1689 const VkPipelineLayout pipelineLayout,
1690 const VkDescriptorSet descriptorSet,
1691 const VkDeviceSize& range,
1692 const bool useTransfer)
1694 // nothing is needed for texel image buffer
1695 if (m_imageType == IMAGE_TYPE_BUFFER)
1698 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1699 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1700 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1704 const VkImageMemoryBarrier imagePostDispatchBarrier =
1705 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1706 VK_ACCESS_TRANSFER_READ_BIT,
1707 VK_IMAGE_LAYOUT_GENERAL,
1708 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1709 m_intermResultsImage->get(),
1712 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1714 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1715 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1717 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1721 const VkDevice device = m_context.getDevice();
1722 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1723 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1725 DescriptorSetUpdateBuilder()
1726 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1727 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1728 .update(deviceInterface, device);
1730 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1731 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1732 VK_ACCESS_SHADER_READ_BIT,
1733 VK_IMAGE_LAYOUT_GENERAL,
1734 VK_IMAGE_LAYOUT_GENERAL,
1735 m_intermResultsImage->get(),
1738 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1739 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1740 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1741 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1742 1u, &resultImagePostDispatchBarrier);
1744 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1745 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1747 switch (m_imageType)
1749 case IMAGE_TYPE_1D_ARRAY:
1750 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1752 case IMAGE_TYPE_2D_ARRAY:
1753 case IMAGE_TYPE_CUBE:
1754 case IMAGE_TYPE_CUBE_ARRAY:
1755 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1758 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1764 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation& outputBufferAllocation,
1765 const bool is64Bit) const
1767 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1768 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1770 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1772 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1773 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1774 for (deUint32 x = 0; x < gridSize.x(); x++)
1776 if (isUintFormat(mapTextureFormat(m_format)))
1780 if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1785 if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1789 else if (isIntFormat(mapTextureFormat(m_format)))
1793 if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1798 if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1804 // 32-bit floating point
1805 if (!areValuesCorrect<deUint32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1813 template <typename T>
1814 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1816 T resultValues[NUM_INVOCATIONS_PER_PIXEL];
1817 T atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1818 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1820 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1822 IVec3 gid(x + i*gridSize.x(), y, z);
1823 T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1824 if (isFloatingPoint)
1827 deMemcpy(&fData, &data, sizeof(fData));
1828 data = static_cast<T>(fData);
1830 resultValues[i] = data;
1831 atomicArgs[i] = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1832 argsUsed[i] = false;
1835 // Verify that the return values form a valid sequence.
1836 return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1839 template <typename T>
1840 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32 index,
1842 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1843 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1844 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1846 if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1849 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1851 if (!argsUsed[i] && resultValues[i] == valueSoFar)
1855 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1860 argsUsed[i] = false;
1867 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1869 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation, m_useTransfer, m_readType, m_backingType);
1874 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1876 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
1880 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1881 : m_imageType (imageType)
1882 , m_imageSize (imageSize)
1885 const ImageType m_imageType;
1886 const tcu::UVec3 m_imageSize;
1889 const ImageParams imageParamsArray[] =
1891 ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
1892 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
1893 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
1894 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
1895 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(48u, 48u, 8u)),
1896 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
1897 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u)),
1898 ImageParams(IMAGE_TYPE_BUFFER, tcu::UVec3(64u, 1u, 1u))
1901 const tcu::TextureFormat formats[] =
1903 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1904 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1905 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1906 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1907 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1912 ShaderReadType type;
1916 { ShaderReadType::NORMAL, "normal_read" },
1917 { ShaderReadType::SPARSE, "sparse_read" },
1922 ImageBackingType type;
1926 { ImageBackingType::NORMAL, "normal_img" },
1927 { ImageBackingType::SPARSE, "sparse_img" },
1930 for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1932 const AtomicOperation operation = (AtomicOperation)operationI;
1934 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
1936 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
1938 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
1939 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
1941 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
1943 for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
1945 const bool useTransfer = (useTransferIdx > 0);
1946 const string groupName = (!useTransfer ? "no" : "") + string("transfer");
1948 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str(), ""));
1950 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
1952 const auto& readType = readTypes[readTypeIdx];
1954 de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name, ""));
1956 for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
1958 const auto& backingType = backingTypes[backingTypeIdx];
1960 de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name, ""));
1962 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
1964 const TextureFormat& format = formats[formatNdx];
1965 const std::string formatName = getShaderImageFormatQualifier(format);
1967 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
1968 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
1973 // Only 2D and 3D images may support sparse residency.
1974 const auto vkImageType = mapImageType(imageType);
1975 if (backingType.type == ImageBackingType::SPARSE && (vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D))
1978 // Only ADD and EXCHANGE are supported on floating-point
1979 if (format.type == tcu::TextureFormat::FLOAT)
1981 if (operation != ATOMIC_OPERATION_ADD && operation != ATOMIC_OPERATION_EXCHANGE)
1987 if (readType.type == ShaderReadType::SPARSE)
1989 // When using transfer, shader reads will not be used, so avoid creating two identical cases.
1993 // Sparse reads are not supported for all types of images.
1994 if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
1998 //!< Atomic case checks the end result of the operations, and not the intermediate return values
1999 const string caseEndResult = formatName + "_end_result";
2000 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2002 //!< Atomic case checks the return values of the atomic function and not the end result.
2003 const string caseIntermValues = formatName + "_intermediate_values";
2004 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2007 readTypeGroup->addChild(backingTypeGroup.release());
2010 transferGroup->addChild(readTypeGroup.release());
2013 imageTypeGroup->addChild(transferGroup.release());
2016 operationGroup->addChild(imageTypeGroup.release());
2019 imageAtomicOperationsTests->addChild(operationGroup.release());
2022 return imageAtomicOperationsTests.release();