1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
24 #include "vktImageAtomicOperationTests.hpp"
26 #include "deUniquePtr.hpp"
27 #include "deStringUtil.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkImageUtil.hpp"
32 #include "vktImageTestsUtil.hpp"
33 #include "vkBuilderUtil.hpp"
35 #include "vkRefUtil.hpp"
36 #include "vkTypeUtil.hpp"
38 #include "tcuTextureUtil.hpp"
39 #include "tcuTexture.hpp"
40 #include "tcuVectorType.hpp"
53 using tcu::TextureFormat;
64 using tcu::Texture2DArray;
65 using tcu::TextureCube;
66 using tcu::PixelBufferAccess;
67 using tcu::ConstPixelBufferAccess;
69 using tcu::TestContext;
73 NUM_INVOCATIONS_PER_PIXEL = 5u
78 ATOMIC_OPERATION_ADD = 0,
84 ATOMIC_OPERATION_EXCHANGE,
85 ATOMIC_OPERATION_COMPARE_EXCHANGE,
90 static string getCoordStr (const ImageType imageType,
98 case IMAGE_TYPE_BUFFER:
100 case IMAGE_TYPE_1D_ARRAY:
102 return string("ivec2(" + x + "," + y + ")");
103 case IMAGE_TYPE_2D_ARRAY:
105 case IMAGE_TYPE_CUBE:
106 case IMAGE_TYPE_CUBE_ARRAY:
107 return string("ivec3(" + x + "," + y + "," + z + ")");
114 static string getAtomicFuncArgumentShaderStr (const AtomicOperation op,
118 const IVec3& gridSize)
122 case ATOMIC_OPERATION_ADD:
123 case ATOMIC_OPERATION_MIN:
124 case ATOMIC_OPERATION_MAX:
125 case ATOMIC_OPERATION_AND:
126 case ATOMIC_OPERATION_OR:
127 case ATOMIC_OPERATION_XOR:
128 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
129 case ATOMIC_OPERATION_EXCHANGE:
130 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
131 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
138 static string getAtomicOperationCaseName (const AtomicOperation op)
142 case ATOMIC_OPERATION_ADD: return string("add");
143 case ATOMIC_OPERATION_MIN: return string("min");
144 case ATOMIC_OPERATION_MAX: return string("max");
145 case ATOMIC_OPERATION_AND: return string("and");
146 case ATOMIC_OPERATION_OR: return string("or");
147 case ATOMIC_OPERATION_XOR: return string("xor");
148 case ATOMIC_OPERATION_EXCHANGE: return string("exchange");
149 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("compare_exchange");
156 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
160 case ATOMIC_OPERATION_ADD: return string("imageAtomicAdd");
161 case ATOMIC_OPERATION_MIN: return string("imageAtomicMin");
162 case ATOMIC_OPERATION_MAX: return string("imageAtomicMax");
163 case ATOMIC_OPERATION_AND: return string("imageAtomicAnd");
164 case ATOMIC_OPERATION_OR: return string("imageAtomicOr");
165 case ATOMIC_OPERATION_XOR: return string("imageAtomicXor");
166 case ATOMIC_OPERATION_EXCHANGE: return string("imageAtomicExchange");
167 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("imageAtomicCompSwap");
174 static deInt32 getOperationInitialValue (const AtomicOperation op)
178 // \note 18 is just an arbitrary small nonzero value.
179 case ATOMIC_OPERATION_ADD: return 18;
180 case ATOMIC_OPERATION_MIN: return (1 << 15) - 1;
181 case ATOMIC_OPERATION_MAX: return 18;
182 case ATOMIC_OPERATION_AND: return (1 << 15) - 1;
183 case ATOMIC_OPERATION_OR: return 18;
184 case ATOMIC_OPERATION_XOR: return 18;
185 case ATOMIC_OPERATION_EXCHANGE: return 18;
186 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 18;
193 static deInt32 getAtomicFuncArgument (const AtomicOperation op, const IVec3& invocationID, const IVec3& gridSize)
195 const int x = invocationID.x();
196 const int y = invocationID.y();
197 const int z = invocationID.z();
201 // \note Fall-throughs.
202 case ATOMIC_OPERATION_ADD:
203 case ATOMIC_OPERATION_MIN:
204 case ATOMIC_OPERATION_MAX:
205 case ATOMIC_OPERATION_AND:
206 case ATOMIC_OPERATION_OR:
207 case ATOMIC_OPERATION_XOR:
208 return x*x + y*y + z*z;
209 case ATOMIC_OPERATION_EXCHANGE:
210 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
211 return (z*gridSize.x() + x)*gridSize.y() + y;
218 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
219 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
221 return op == ATOMIC_OPERATION_ADD ||
222 op == ATOMIC_OPERATION_MIN ||
223 op == ATOMIC_OPERATION_MAX ||
224 op == ATOMIC_OPERATION_AND ||
225 op == ATOMIC_OPERATION_OR ||
226 op == ATOMIC_OPERATION_XOR;
229 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
230 static deInt32 computeBinaryAtomicOperationResult (const AtomicOperation op, const deInt32 a, const deInt32 b)
234 case ATOMIC_OPERATION_ADD: return a + b;
235 case ATOMIC_OPERATION_MIN: return de::min(a, b);
236 case ATOMIC_OPERATION_MAX: return de::max(a, b);
237 case ATOMIC_OPERATION_AND: return a & b;
238 case ATOMIC_OPERATION_OR: return a | b;
239 case ATOMIC_OPERATION_XOR: return a ^ b;
240 case ATOMIC_OPERATION_EXCHANGE: return b;
241 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return (a == 18) ? b : a;
248 class BinaryAtomicEndResultCase : public vkt::TestCase
251 BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
253 const string& description,
254 const ImageType imageType,
255 const tcu::UVec3& imageSize,
256 const tcu::TextureFormat& format,
257 const AtomicOperation operation,
258 const glu::GLSLVersion glslVersion);
260 void initPrograms (SourceCollections& sourceCollections) const;
261 TestInstance* createInstance (Context& context) const;
264 const ImageType m_imageType;
265 const tcu::UVec3 m_imageSize;
266 const tcu::TextureFormat m_format;
267 const AtomicOperation m_operation;
268 const glu::GLSLVersion m_glslVersion;
271 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
273 const string& description,
274 const ImageType imageType,
275 const tcu::UVec3& imageSize,
276 const tcu::TextureFormat& format,
277 const AtomicOperation operation,
278 const glu::GLSLVersion glslVersion)
279 : TestCase (testCtx, name, description)
280 , m_imageType (imageType)
281 , m_imageSize (imageSize)
283 , m_operation (operation)
284 , m_glslVersion (glslVersion)
288 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
290 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
292 const bool uintFormat = isUintFormat(mapTextureFormat(m_format));
293 const bool intFormat = isIntFormat(mapTextureFormat(m_format));
294 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
295 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
297 const string atomicArgExpr = (uintFormat ? "uint" : intFormat ? "int" : "float")
298 + getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
300 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ? ", 18" + string(uintFormat ? "u" : "") : "";
301 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
302 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
303 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
305 string source = versionDecl + "\n"
306 "precision highp " + shaderImageTypeStr + ";\n"
308 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
309 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
313 " int gx = int(gl_GlobalInvocationID.x);\n"
314 " int gy = int(gl_GlobalInvocationID.y);\n"
315 " int gz = int(gl_GlobalInvocationID.z);\n"
316 " " + atomicInvocation + ";\n"
319 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
322 class BinaryAtomicIntermValuesCase : public vkt::TestCase
325 BinaryAtomicIntermValuesCase (tcu::TestContext& testCtx,
327 const string& description,
328 const ImageType imageType,
329 const tcu::UVec3& imageSize,
330 const tcu::TextureFormat& format,
331 const AtomicOperation operation,
332 const glu::GLSLVersion glslVersion);
334 void initPrograms (SourceCollections& sourceCollections) const;
335 TestInstance* createInstance (Context& context) const;
338 const ImageType m_imageType;
339 const tcu::UVec3 m_imageSize;
340 const tcu::TextureFormat m_format;
341 const AtomicOperation m_operation;
342 const glu::GLSLVersion m_glslVersion;
345 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext& testCtx,
347 const string& description,
348 const ImageType imageType,
349 const tcu::UVec3& imageSize,
350 const TextureFormat& format,
351 const AtomicOperation operation,
352 const glu::GLSLVersion glslVersion)
353 : TestCase (testCtx, name, description)
354 , m_imageType (imageType)
355 , m_imageSize (imageSize)
357 , m_operation (operation)
358 , m_glslVersion (glslVersion)
362 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
364 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
366 const bool uintFormat = isUintFormat(mapTextureFormat(m_format));
367 const bool intFormat = isIntFormat(mapTextureFormat(m_format));
368 const string colorVecTypeName = string(uintFormat ? "u" : intFormat ? "i" : "") + "vec4";
369 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
370 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
371 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
372 const string atomicArgExpr = (uintFormat ? "uint" : intFormat ? "int" : "float")
373 + getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
375 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ? ", 18" + string(uintFormat ? "u" : "") : "";
376 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
377 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
378 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
380 string source = versionDecl + "\n"
381 "precision highp " + shaderImageTypeStr + ";\n"
383 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
384 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
385 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
389 " int gx = int(gl_GlobalInvocationID.x);\n"
390 " int gy = int(gl_GlobalInvocationID.y);\n"
391 " int gz = int(gl_GlobalInvocationID.z);\n"
392 " imageStore(u_intermValuesImage, " + invocationCoord + ", " + colorVecTypeName + "(" + atomicInvocation + "));\n"
395 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
398 class BinaryAtomicInstanceBase : public vkt::TestInstance
402 BinaryAtomicInstanceBase (Context& context,
404 const ImageType imageType,
405 const tcu::UVec3& imageSize,
406 const TextureFormat& format,
407 const AtomicOperation operation);
409 tcu::TestStatus iterate (void);
411 virtual deUint32 getOutputBufferSize (void) const = 0;
413 virtual void prepareResources (void) = 0;
414 virtual void prepareDescriptors (void) = 0;
416 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const = 0;
417 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer) const = 0;
419 virtual bool verifyResult (Allocation& outputBufferAllocation) const = 0;
423 const ImageType m_imageType;
424 const tcu::UVec3 m_imageSize;
425 const TextureFormat m_format;
426 const AtomicOperation m_operation;
428 de::MovePtr<Buffer> m_outputBuffer;
429 Move<VkDescriptorPool> m_descriptorPool;
430 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
431 Move<VkDescriptorSet> m_descriptorSet;
432 de::MovePtr<Image> m_resultImage;
433 Move<VkImageView> m_resultImageView;
436 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context& context,
438 const ImageType imageType,
439 const tcu::UVec3& imageSize,
440 const TextureFormat& format,
441 const AtomicOperation operation)
442 : vkt::TestInstance (context)
444 , m_imageType (imageType)
445 , m_imageSize (imageSize)
447 , m_operation (operation)
451 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
453 const VkDevice device = m_context.getDevice();
454 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
455 const VkQueue queue = m_context.getUniversalQueue();
456 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
457 Allocator& allocator = m_context.getDefaultAllocator();
458 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
459 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
461 const VkImageCreateInfo imageParams =
463 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
464 DE_NULL, // const void* pNext;
465 (m_imageType == IMAGE_TYPE_CUBE ||
466 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
467 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
468 (VkImageCreateFlags)0u), // VkImageCreateFlags flags;
469 mapImageType(m_imageType), // VkImageType imageType;
470 mapTextureFormat(m_format), // VkFormat format;
471 makeExtent3D(getLayerSize(m_imageType, m_imageSize)), // VkExtent3D extent;
472 1u, // deUint32 mipLevels;
473 getNumLayers(m_imageType, m_imageSize), // deUint32 arrayLayers;
474 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
475 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
476 VK_IMAGE_USAGE_STORAGE_BIT |
477 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
478 VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
479 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
480 0u, // deUint32 queueFamilyIndexCount;
481 DE_NULL, // const deUint32* pQueueFamilyIndices;
482 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
485 //Create the image that is going to store results of atomic operations
486 m_resultImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
488 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
490 m_resultImageView = makeImageView(deviceInterface, device, m_resultImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
492 //Prepare the buffer with the initial data for the image
493 const Buffer inputBuffer(deviceInterface, device, allocator, makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
495 Allocation& inputBufferAllocation = inputBuffer.getAllocation();
497 //Prepare the initial data for the image
498 const tcu::IVec4 initialValue(getOperationInitialValue(m_operation));
500 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
501 tcu::PixelBufferAccess inputPixelBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), inputBufferAllocation.getHostPtr());
503 for (deUint32 z = 0; z < gridSize.z(); z++)
504 for (deUint32 y = 0; y < gridSize.y(); y++)
505 for (deUint32 x = 0; x < gridSize.x(); x++)
507 inputPixelBuffer.setPixel(initialValue, x, y, z);
510 flushMappedMemoryRange(deviceInterface, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), imageSizeInBytes);
512 // Create a buffer to store shader output copied from result image
513 m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface, device, allocator, makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
517 prepareDescriptors();
520 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
521 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
522 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
524 // Create command buffer
525 const Unique<VkCommandPool> cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
526 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
528 beginCommandBuffer(deviceInterface, *cmdBuffer);
530 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
531 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
533 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier =
534 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT,
535 VK_ACCESS_TRANSFER_READ_BIT,
540 const VkImageMemoryBarrier resultImagePreCopyBarrier =
541 makeImageMemoryBarrier( 0u,
542 VK_ACCESS_TRANSFER_WRITE_BIT,
543 VK_IMAGE_LAYOUT_UNDEFINED,
544 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
545 m_resultImage->get(),
548 deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 1u, &inputBufferPostHostWriteBarrier, 1u, &resultImagePreCopyBarrier);
550 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
552 deviceInterface.cmdCopyBufferToImage(*cmdBuffer, *inputBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &bufferImageCopyParams);
554 const VkImageMemoryBarrier resultImagePostCopyBarrier =
555 makeImageMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
556 VK_ACCESS_SHADER_READ_BIT,
557 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
558 VK_IMAGE_LAYOUT_GENERAL,
559 m_resultImage->get(),
562 deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostCopyBarrier);
564 commandsBeforeCompute(*cmdBuffer);
566 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
568 commandsAfterCompute(*cmdBuffer);
570 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier
571 = makeBufferMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
572 VK_ACCESS_HOST_READ_BIT,
573 m_outputBuffer->get(),
577 deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
579 endCommandBuffer(deviceInterface, *cmdBuffer);
581 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer);
583 Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
585 invalidateMappedMemoryRange(deviceInterface, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outBuffSizeInBytes);
587 if (verifyResult(outputBufferAllocation))
588 return tcu::TestStatus::pass("Comparison succeeded");
590 return tcu::TestStatus::fail("Comparison failed");
593 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
597 BinaryAtomicEndResultInstance (Context& context,
599 const ImageType imageType,
600 const tcu::UVec3& imageSize,
601 const TextureFormat& format,
602 const AtomicOperation operation)
603 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
605 virtual deUint32 getOutputBufferSize (void) const;
607 virtual void prepareResources (void) {}
608 virtual void prepareDescriptors (void);
610 virtual void commandsBeforeCompute (const VkCommandBuffer) const {}
611 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer) const;
613 virtual bool verifyResult (Allocation& outputBufferAllocation) const;
616 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
618 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
621 void BinaryAtomicEndResultInstance::prepareDescriptors (void)
623 const VkDevice device = m_context.getDevice();
624 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
626 m_descriptorSetLayout =
627 DescriptorSetLayoutBuilder()
628 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
629 .build(deviceInterface, device);
632 DescriptorPoolBuilder()
633 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
634 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
636 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
638 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
640 DescriptorSetUpdateBuilder()
641 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
642 .update(deviceInterface, device);
645 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
647 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
648 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
650 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
651 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
652 VK_ACCESS_TRANSFER_READ_BIT,
653 VK_IMAGE_LAYOUT_GENERAL,
654 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
655 m_resultImage->get(),
658 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostDispatchBarrier);
660 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
662 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
665 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation) const
667 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
668 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
670 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
672 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
673 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
674 for (deInt32 x = 0; x < resultBuffer.getWidth(); x++)
676 deInt32 resultValue = resultBuffer.getPixelInt(x, y, z).x();
678 if (isOrderIndependentAtomicOperation(m_operation))
680 deInt32 reference = getOperationInitialValue(m_operation);
682 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
684 const IVec3 gid(x + i*gridSize.x(), y, z);
685 reference = computeBinaryAtomicOperationResult(m_operation, reference, getAtomicFuncArgument(m_operation, gid, extendedGridSize));
688 if (resultValue != reference)
691 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
693 // Check if the end result equals one of the atomic args.
694 bool matchFound = false;
696 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
698 const IVec3 gid(x + i*gridSize.x(), y, z);
699 matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
705 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
707 // Check if the end result equals one of the atomic args.
708 bool matchFound = false;
710 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
712 const IVec3 gid(x + i*gridSize.x(), y, z);
713 matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
725 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
727 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
730 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
734 BinaryAtomicIntermValuesInstance (Context& context,
736 const ImageType imageType,
737 const tcu::UVec3& imageSize,
738 const TextureFormat& format,
739 const AtomicOperation operation)
740 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
742 virtual deUint32 getOutputBufferSize (void) const;
744 virtual void prepareResources (void);
745 virtual void prepareDescriptors (void);
747 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const;
748 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer) const;
750 virtual bool verifyResult (Allocation& outputBufferAllocation) const;
754 bool verifyRecursive (const deInt32 index,
755 const deInt32 valueSoFar,
756 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
757 const deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
758 const deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
759 de::MovePtr<Image> m_intermResultsImage;
760 Move<VkImageView> m_intermResultsImageView;
763 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
765 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
768 void BinaryAtomicIntermValuesInstance::prepareResources (void)
770 const VkDevice device = m_context.getDevice();
771 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
772 Allocator& allocator = m_context.getDefaultAllocator();
774 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
775 const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
776 const UVec3 extendedLayerSize = isCubeBasedImage ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
777 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
779 const VkImageCreateInfo imageParams =
781 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
782 DE_NULL, // const void* pNext;
783 (m_imageType == IMAGE_TYPE_CUBE ||
784 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
785 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
786 (VkImageCreateFlags)0u), // VkImageCreateFlags flags;
787 mapImageType(m_imageType), // VkImageType imageType;
788 mapTextureFormat(m_format), // VkFormat format;
789 makeExtent3D(extendedLayerSize), // VkExtent3D extent;
790 1u, // deUint32 mipLevels;
791 getNumLayers(m_imageType, m_imageSize), // deUint32 arrayLayers;
792 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
793 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
794 VK_IMAGE_USAGE_STORAGE_BIT |
795 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
796 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
797 0u, // deUint32 queueFamilyIndexCount;
798 DE_NULL, // const deUint32* pQueueFamilyIndices;
799 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
802 m_intermResultsImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
804 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
806 m_intermResultsImageView = makeImageView(deviceInterface, device, m_intermResultsImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
809 void BinaryAtomicIntermValuesInstance::prepareDescriptors (void)
811 const VkDevice device = m_context.getDevice();
812 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
814 m_descriptorSetLayout =
815 DescriptorSetLayoutBuilder()
816 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
817 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
818 .build(deviceInterface, device);
821 DescriptorPoolBuilder()
822 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u)
823 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
825 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
827 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
828 const VkDescriptorImageInfo descIntermResultsImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
830 DescriptorSetUpdateBuilder()
831 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
832 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descIntermResultsImageInfo)
833 .update(deviceInterface, device);
836 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
838 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
839 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
841 const VkImageMemoryBarrier imagePreDispatchBarrier =
842 makeImageMemoryBarrier( 0u,
843 VK_ACCESS_SHADER_WRITE_BIT,
844 VK_IMAGE_LAYOUT_UNDEFINED,
845 VK_IMAGE_LAYOUT_GENERAL,
846 m_intermResultsImage->get(),
849 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
852 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
854 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
855 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
857 const VkImageMemoryBarrier imagePostDispatchBarrier =
858 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
859 VK_ACCESS_TRANSFER_READ_BIT,
860 VK_IMAGE_LAYOUT_GENERAL,
861 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
862 m_intermResultsImage->get(),
865 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
867 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
868 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
869 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
871 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
874 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation& outputBufferAllocation) const
876 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
877 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
879 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
881 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
882 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
883 for (deUint32 x = 0; x < gridSize.x(); x++)
885 deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL];
886 deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
887 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
889 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
891 IVec3 gid(x + i*gridSize.x(), y, z);
893 resultValues[i] = resultBuffer.getPixelInt(gid.x(), gid.y(), gid.z()).x();
894 atomicArgs[i] = getAtomicFuncArgument(m_operation, gid, extendedGridSize);
898 // Verify that the return values form a valid sequence.
899 if (!verifyRecursive(0, getOperationInitialValue(m_operation), argsUsed, atomicArgs, resultValues))
908 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32 index,
909 const deInt32 valueSoFar,
910 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
911 const deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
912 const deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
914 if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
917 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
919 if (!argsUsed[i] && resultValues[i] == valueSoFar)
923 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
935 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
937 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
942 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
944 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
948 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
949 : m_imageType (imageType)
950 , m_imageSize (imageSize)
953 const ImageType m_imageType;
954 const tcu::UVec3 m_imageSize;
957 static const ImageParams imageParamsArray[] =
959 ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
960 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
961 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
962 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
963 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(64u, 64u, 8u)),
964 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
965 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u))
968 static const tcu::TextureFormat formats[] =
970 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
971 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32)
974 for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
976 const AtomicOperation operation = (AtomicOperation)operationI;
978 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
980 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
982 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
983 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
985 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
987 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
989 const TextureFormat& format = formats[formatNdx];
990 const std::string formatName = getShaderImageFormatQualifier(format);
992 //!< Atomic case checks the end result of the operations, and not the intermediate return values
993 const string caseEndResult = formatName + "_end_result";
994 imageTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
996 //!< Atomic case checks the return values of the atomic function and not the end result.
997 const string caseIntermValues = formatName + "_intermediate_values";
998 imageTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
1001 operationGroup->addChild(imageTypeGroup.release());
1004 imageAtomicOperationsTests->addChild(operationGroup.release());
1007 return imageAtomicOperationsTests.release();