1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
24 #include "vktImageAtomicOperationTests.hpp"
26 #include "deUniquePtr.hpp"
27 #include "deStringUtil.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkImageUtil.hpp"
32 #include "vktImageTestsUtil.hpp"
33 #include "vkBuilderUtil.hpp"
35 #include "vkRefUtil.hpp"
36 #include "vkTypeUtil.hpp"
38 #include "tcuTextureUtil.hpp"
39 #include "tcuTexture.hpp"
40 #include "tcuVectorType.hpp"
53 using tcu::TextureFormat;
64 using tcu::Texture2DArray;
65 using tcu::TextureCube;
66 using tcu::PixelBufferAccess;
67 using tcu::ConstPixelBufferAccess;
69 using tcu::TestContext;
73 NUM_INVOCATIONS_PER_PIXEL = 5u
78 ATOMIC_OPERATION_ADD = 0,
84 ATOMIC_OPERATION_EXCHANGE,
89 static string getCoordStr (const ImageType imageType,
97 case IMAGE_TYPE_BUFFER:
99 case IMAGE_TYPE_1D_ARRAY:
101 return string("ivec2(" + x + "," + y + ")");
102 case IMAGE_TYPE_2D_ARRAY:
104 case IMAGE_TYPE_CUBE:
105 case IMAGE_TYPE_CUBE_ARRAY:
106 return string("ivec3(" + x + "," + y + "," + z + ")");
113 static string getAtomicFuncArgumentShaderStr (const AtomicOperation op,
117 const IVec3& gridSize)
121 case ATOMIC_OPERATION_ADD:
122 case ATOMIC_OPERATION_MIN:
123 case ATOMIC_OPERATION_MAX:
124 case ATOMIC_OPERATION_AND:
125 case ATOMIC_OPERATION_OR:
126 case ATOMIC_OPERATION_XOR:
127 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
128 case ATOMIC_OPERATION_EXCHANGE:
129 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
136 static string getAtomicOperationCaseName (const AtomicOperation op)
140 case ATOMIC_OPERATION_ADD: return string("add");
141 case ATOMIC_OPERATION_MIN: return string("min");
142 case ATOMIC_OPERATION_MAX: return string("max");
143 case ATOMIC_OPERATION_AND: return string("and");
144 case ATOMIC_OPERATION_OR: return string("or");
145 case ATOMIC_OPERATION_XOR: return string("xor");
146 case ATOMIC_OPERATION_EXCHANGE: return string("exchange");
153 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
157 case ATOMIC_OPERATION_ADD: return string("imageAtomicAdd");
158 case ATOMIC_OPERATION_MIN: return string("imageAtomicMin");
159 case ATOMIC_OPERATION_MAX: return string("imageAtomicMax");
160 case ATOMIC_OPERATION_AND: return string("imageAtomicAnd");
161 case ATOMIC_OPERATION_OR: return string("imageAtomicOr");
162 case ATOMIC_OPERATION_XOR: return string("imageAtomicXor");
163 case ATOMIC_OPERATION_EXCHANGE: return string("imageAtomicExchange");
170 static deInt32 getOperationInitialValue (const AtomicOperation op)
174 // \note 18 is just an arbitrary small nonzero value.
175 case ATOMIC_OPERATION_ADD: return 18;
176 case ATOMIC_OPERATION_MIN: return (1 << 15) - 1;
177 case ATOMIC_OPERATION_MAX: return 18;
178 case ATOMIC_OPERATION_AND: return (1 << 15) - 1;
179 case ATOMIC_OPERATION_OR: return 18;
180 case ATOMIC_OPERATION_XOR: return 18;
181 case ATOMIC_OPERATION_EXCHANGE: return 18;
188 static deInt32 getAtomicFuncArgument (const AtomicOperation op, const IVec3& invocationID, const IVec3& gridSize)
190 const int x = invocationID.x();
191 const int y = invocationID.y();
192 const int z = invocationID.z();
196 // \note Fall-throughs.
197 case ATOMIC_OPERATION_ADD:
198 case ATOMIC_OPERATION_MIN:
199 case ATOMIC_OPERATION_MAX:
200 case ATOMIC_OPERATION_AND:
201 case ATOMIC_OPERATION_OR:
202 case ATOMIC_OPERATION_XOR:
203 return x*x + y*y + z*z;
204 case ATOMIC_OPERATION_EXCHANGE:
205 return (z*gridSize.x() + x)*gridSize.y() + y;
212 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
213 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
215 return op == ATOMIC_OPERATION_ADD ||
216 op == ATOMIC_OPERATION_MIN ||
217 op == ATOMIC_OPERATION_MAX ||
218 op == ATOMIC_OPERATION_AND ||
219 op == ATOMIC_OPERATION_OR ||
220 op == ATOMIC_OPERATION_XOR;
223 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
224 static deInt32 computeBinaryAtomicOperationResult (const AtomicOperation op, const deInt32 a, const deInt32 b)
228 case ATOMIC_OPERATION_ADD: return a + b;
229 case ATOMIC_OPERATION_MIN: return de::min(a, b);
230 case ATOMIC_OPERATION_MAX: return de::max(a, b);
231 case ATOMIC_OPERATION_AND: return a & b;
232 case ATOMIC_OPERATION_OR: return a | b;
233 case ATOMIC_OPERATION_XOR: return a ^ b;
234 case ATOMIC_OPERATION_EXCHANGE: return b;
241 class BinaryAtomicEndResultCase : public vkt::TestCase
244 BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
246 const string& description,
247 const ImageType imageType,
248 const tcu::UVec3& imageSize,
249 const tcu::TextureFormat& format,
250 const AtomicOperation operation,
251 const glu::GLSLVersion glslVersion);
253 void initPrograms (SourceCollections& sourceCollections) const;
254 TestInstance* createInstance (Context& context) const;
257 const ImageType m_imageType;
258 const tcu::UVec3 m_imageSize;
259 const tcu::TextureFormat m_format;
260 const AtomicOperation m_operation;
261 const glu::GLSLVersion m_glslVersion;
264 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
266 const string& description,
267 const ImageType imageType,
268 const tcu::UVec3& imageSize,
269 const tcu::TextureFormat& format,
270 const AtomicOperation operation,
271 const glu::GLSLVersion glslVersion)
272 : TestCase (testCtx, name, description)
273 , m_imageType (imageType)
274 , m_imageSize (imageSize)
276 , m_operation (operation)
277 , m_glslVersion (glslVersion)
281 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
283 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
285 const bool uintFormat = isUintFormat(mapTextureFormat(m_format));
286 const bool intFormat = isIntFormat(mapTextureFormat(m_format));
287 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
288 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
290 const string atomicArgExpr = (uintFormat ? "uint" : intFormat ? "int" : "float")
291 + getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
293 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + ", " + atomicArgExpr + ")";
294 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
295 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
297 string source = versionDecl + "\n"
298 "precision highp " + shaderImageTypeStr + ";\n"
300 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
301 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
305 " int gx = int(gl_GlobalInvocationID.x);\n"
306 " int gy = int(gl_GlobalInvocationID.y);\n"
307 " int gz = int(gl_GlobalInvocationID.z);\n"
308 " " + atomicInvocation + ";\n"
311 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
314 class BinaryAtomicIntermValuesCase : public vkt::TestCase
317 BinaryAtomicIntermValuesCase (tcu::TestContext& testCtx,
319 const string& description,
320 const ImageType imageType,
321 const tcu::UVec3& imageSize,
322 const tcu::TextureFormat& format,
323 const AtomicOperation operation,
324 const glu::GLSLVersion glslVersion);
326 void initPrograms (SourceCollections& sourceCollections) const;
327 TestInstance* createInstance (Context& context) const;
330 const ImageType m_imageType;
331 const tcu::UVec3 m_imageSize;
332 const tcu::TextureFormat m_format;
333 const AtomicOperation m_operation;
334 const glu::GLSLVersion m_glslVersion;
337 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext& testCtx,
339 const string& description,
340 const ImageType imageType,
341 const tcu::UVec3& imageSize,
342 const TextureFormat& format,
343 const AtomicOperation operation,
344 const glu::GLSLVersion glslVersion)
345 : TestCase (testCtx, name, description)
346 , m_imageType (imageType)
347 , m_imageSize (imageSize)
349 , m_operation (operation)
350 , m_glslVersion (glslVersion)
354 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
356 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
358 const bool uintFormat = isUintFormat(mapTextureFormat(m_format));
359 const bool intFormat = isIntFormat(mapTextureFormat(m_format));
360 const string colorVecTypeName = string(uintFormat ? "u" : intFormat ? "i" : "") + "vec4";
361 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
362 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
363 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
364 const string atomicArgExpr = (uintFormat ? "uint" : intFormat ? "int" : "float")
365 + getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
367 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + ", " + atomicArgExpr + ")";
368 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
369 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
371 string source = versionDecl + "\n"
372 "precision highp " + shaderImageTypeStr + ";\n"
374 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
375 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
376 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
380 " int gx = int(gl_GlobalInvocationID.x);\n"
381 " int gy = int(gl_GlobalInvocationID.y);\n"
382 " int gz = int(gl_GlobalInvocationID.z);\n"
383 " imageStore(u_intermValuesImage, " + invocationCoord + ", " + colorVecTypeName + "(" + atomicInvocation + "));\n"
386 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
389 class BinaryAtomicInstanceBase : public vkt::TestInstance
393 BinaryAtomicInstanceBase (Context& context,
395 const ImageType imageType,
396 const tcu::UVec3& imageSize,
397 const TextureFormat& format,
398 const AtomicOperation operation);
400 tcu::TestStatus iterate (void);
402 virtual deUint32 getOutputBufferSize (void) const = 0;
404 virtual void prepareResources (void) = 0;
405 virtual void prepareDescriptors (void) = 0;
407 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const = 0;
408 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer) const = 0;
410 virtual bool verifyResult (Allocation& outputBufferAllocation) const = 0;
414 const ImageType m_imageType;
415 const tcu::UVec3 m_imageSize;
416 const TextureFormat m_format;
417 const AtomicOperation m_operation;
419 de::MovePtr<Buffer> m_outputBuffer;
420 Move<VkDescriptorPool> m_descriptorPool;
421 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
422 Move<VkDescriptorSet> m_descriptorSet;
423 de::MovePtr<Image> m_resultImage;
424 Move<VkImageView> m_resultImageView;
427 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context& context,
429 const ImageType imageType,
430 const tcu::UVec3& imageSize,
431 const TextureFormat& format,
432 const AtomicOperation operation)
433 : vkt::TestInstance (context)
435 , m_imageType (imageType)
436 , m_imageSize (imageSize)
438 , m_operation (operation)
442 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
444 const VkDevice device = m_context.getDevice();
445 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
446 const VkQueue queue = m_context.getUniversalQueue();
447 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
448 Allocator& allocator = m_context.getDefaultAllocator();
449 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
450 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
452 const VkImageCreateInfo imageParams =
454 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
455 DE_NULL, // const void* pNext;
456 (m_imageType == IMAGE_TYPE_CUBE ||
457 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
458 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
459 (VkImageCreateFlags)0u), // VkImageCreateFlags flags;
460 mapImageType(m_imageType), // VkImageType imageType;
461 mapTextureFormat(m_format), // VkFormat format;
462 makeExtent3D(getLayerSize(m_imageType, m_imageSize)), // VkExtent3D extent;
463 1u, // deUint32 mipLevels;
464 getNumLayers(m_imageType, m_imageSize), // deUint32 arrayLayers;
465 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
466 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
467 VK_IMAGE_USAGE_STORAGE_BIT |
468 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
469 VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
470 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
471 0u, // deUint32 queueFamilyIndexCount;
472 DE_NULL, // const deUint32* pQueueFamilyIndices;
473 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
476 //Create the image that is going to store results of atomic operations
477 m_resultImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
479 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
481 m_resultImageView = makeImageView(deviceInterface, device, m_resultImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
483 //Prepare the buffer with the initial data for the image
484 const Buffer inputBuffer(deviceInterface, device, allocator, makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
486 Allocation& inputBufferAllocation = inputBuffer.getAllocation();
488 //Prepare the initial data for the image
489 const tcu::IVec4 initialValue(getOperationInitialValue(m_operation));
491 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
492 tcu::PixelBufferAccess inputPixelBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), inputBufferAllocation.getHostPtr());
494 for (deUint32 z = 0; z < gridSize.z(); z++)
495 for (deUint32 y = 0; y < gridSize.y(); y++)
496 for (deUint32 x = 0; x < gridSize.x(); x++)
498 inputPixelBuffer.setPixel(initialValue, x, y, z);
501 flushMappedMemoryRange(deviceInterface, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), imageSizeInBytes);
503 // Create a buffer to store shader output copied from result image
504 m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface, device, allocator, makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
508 prepareDescriptors();
511 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
512 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
513 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
515 // Create command buffer
516 const Unique<VkCommandPool> cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
517 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
519 beginCommandBuffer(deviceInterface, *cmdBuffer);
521 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
522 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
524 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier =
525 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT,
526 VK_ACCESS_TRANSFER_READ_BIT,
531 const VkImageMemoryBarrier resultImagePreCopyBarrier =
532 makeImageMemoryBarrier( 0u,
533 VK_ACCESS_TRANSFER_WRITE_BIT,
534 VK_IMAGE_LAYOUT_UNDEFINED,
535 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
536 m_resultImage->get(),
539 deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 1u, &inputBufferPostHostWriteBarrier, 1u, &resultImagePreCopyBarrier);
541 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
543 deviceInterface.cmdCopyBufferToImage(*cmdBuffer, *inputBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &bufferImageCopyParams);
545 const VkImageMemoryBarrier resultImagePostCopyBarrier =
546 makeImageMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
547 VK_ACCESS_SHADER_READ_BIT,
548 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
549 VK_IMAGE_LAYOUT_GENERAL,
550 m_resultImage->get(),
553 deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostCopyBarrier);
555 commandsBeforeCompute(*cmdBuffer);
557 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
559 commandsAfterCompute(*cmdBuffer);
561 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier
562 = makeBufferMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
563 VK_ACCESS_HOST_READ_BIT,
564 m_outputBuffer->get(),
568 deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
570 endCommandBuffer(deviceInterface, *cmdBuffer);
572 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer);
574 Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
576 invalidateMappedMemoryRange(deviceInterface, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outBuffSizeInBytes);
578 if (verifyResult(outputBufferAllocation))
579 return tcu::TestStatus::pass("Comparison succeeded");
581 return tcu::TestStatus::fail("Comparison failed");
584 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
588 BinaryAtomicEndResultInstance (Context& context,
590 const ImageType imageType,
591 const tcu::UVec3& imageSize,
592 const TextureFormat& format,
593 const AtomicOperation operation)
594 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
596 virtual deUint32 getOutputBufferSize (void) const;
598 virtual void prepareResources (void) {}
599 virtual void prepareDescriptors (void);
601 virtual void commandsBeforeCompute (const VkCommandBuffer) const {}
602 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer) const;
604 virtual bool verifyResult (Allocation& outputBufferAllocation) const;
607 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
609 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
612 void BinaryAtomicEndResultInstance::prepareDescriptors (void)
614 const VkDevice device = m_context.getDevice();
615 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
617 m_descriptorSetLayout =
618 DescriptorSetLayoutBuilder()
619 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
620 .build(deviceInterface, device);
623 DescriptorPoolBuilder()
624 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
625 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
627 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
629 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
631 DescriptorSetUpdateBuilder()
632 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
633 .update(deviceInterface, device);
636 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
638 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
639 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
641 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
642 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
643 VK_ACCESS_TRANSFER_READ_BIT,
644 VK_IMAGE_LAYOUT_GENERAL,
645 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
646 m_resultImage->get(),
649 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostDispatchBarrier);
651 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
653 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
656 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation) const
658 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
659 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
661 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
663 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
664 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
665 for (deInt32 x = 0; x < resultBuffer.getWidth(); x++)
667 deInt32 resultValue = resultBuffer.getPixelInt(x, y, z).x();
669 if (isOrderIndependentAtomicOperation(m_operation))
671 deInt32 reference = getOperationInitialValue(m_operation);
673 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
675 const IVec3 gid(x + i*gridSize.x(), y, z);
676 reference = computeBinaryAtomicOperationResult(m_operation, reference, getAtomicFuncArgument(m_operation, gid, extendedGridSize));
679 if (resultValue != reference)
682 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
684 // Check if the end result equals one of the atomic args.
685 bool matchFound = false;
687 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
689 const IVec3 gid(x + i*gridSize.x(), y, z);
690 matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
702 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
704 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
707 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
711 BinaryAtomicIntermValuesInstance (Context& context,
713 const ImageType imageType,
714 const tcu::UVec3& imageSize,
715 const TextureFormat& format,
716 const AtomicOperation operation)
717 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
719 virtual deUint32 getOutputBufferSize (void) const;
721 virtual void prepareResources (void);
722 virtual void prepareDescriptors (void);
724 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const;
725 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer) const;
727 virtual bool verifyResult (Allocation& outputBufferAllocation) const;
731 bool verifyRecursive (const deInt32 index,
732 const deInt32 valueSoFar,
733 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
734 const deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
735 const deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
736 de::MovePtr<Image> m_intermResultsImage;
737 Move<VkImageView> m_intermResultsImageView;
740 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
742 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
745 void BinaryAtomicIntermValuesInstance::prepareResources (void)
747 const VkDevice device = m_context.getDevice();
748 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
749 Allocator& allocator = m_context.getDefaultAllocator();
751 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
752 const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
753 const UVec3 extendedLayerSize = isCubeBasedImage ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
754 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
756 const VkImageCreateInfo imageParams =
758 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
759 DE_NULL, // const void* pNext;
760 (m_imageType == IMAGE_TYPE_CUBE ||
761 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
762 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
763 (VkImageCreateFlags)0u), // VkImageCreateFlags flags;
764 mapImageType(m_imageType), // VkImageType imageType;
765 mapTextureFormat(m_format), // VkFormat format;
766 makeExtent3D(extendedLayerSize), // VkExtent3D extent;
767 1u, // deUint32 mipLevels;
768 getNumLayers(m_imageType, m_imageSize), // deUint32 arrayLayers;
769 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
770 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
771 VK_IMAGE_USAGE_STORAGE_BIT |
772 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
773 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
774 0u, // deUint32 queueFamilyIndexCount;
775 DE_NULL, // const deUint32* pQueueFamilyIndices;
776 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
779 m_intermResultsImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
781 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
783 m_intermResultsImageView = makeImageView(deviceInterface, device, m_intermResultsImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
786 void BinaryAtomicIntermValuesInstance::prepareDescriptors (void)
788 const VkDevice device = m_context.getDevice();
789 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
791 m_descriptorSetLayout =
792 DescriptorSetLayoutBuilder()
793 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
794 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
795 .build(deviceInterface, device);
798 DescriptorPoolBuilder()
799 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u)
800 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
802 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
804 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
805 const VkDescriptorImageInfo descIntermResultsImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
807 DescriptorSetUpdateBuilder()
808 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
809 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descIntermResultsImageInfo)
810 .update(deviceInterface, device);
813 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
815 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
816 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
818 const VkImageMemoryBarrier imagePreDispatchBarrier =
819 makeImageMemoryBarrier( 0u,
820 VK_ACCESS_SHADER_WRITE_BIT,
821 VK_IMAGE_LAYOUT_UNDEFINED,
822 VK_IMAGE_LAYOUT_GENERAL,
823 m_intermResultsImage->get(),
826 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
829 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
831 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
832 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
834 const VkImageMemoryBarrier imagePostDispatchBarrier =
835 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
836 VK_ACCESS_TRANSFER_READ_BIT,
837 VK_IMAGE_LAYOUT_GENERAL,
838 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
839 m_intermResultsImage->get(),
842 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
844 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
845 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
846 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
848 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
851 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation& outputBufferAllocation) const
853 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
854 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
856 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
858 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
859 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
860 for (deUint32 x = 0; x < gridSize.x(); x++)
862 deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL];
863 deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
864 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
866 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
868 IVec3 gid(x + i*gridSize.x(), y, z);
870 resultValues[i] = resultBuffer.getPixelInt(gid.x(), gid.y(), gid.z()).x();
871 atomicArgs[i] = getAtomicFuncArgument(m_operation, gid, extendedGridSize);
875 // Verify that the return values form a valid sequence.
876 if (!verifyRecursive(0, getOperationInitialValue(m_operation), argsUsed, atomicArgs, resultValues))
885 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32 index,
886 const deInt32 valueSoFar,
887 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
888 const deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
889 const deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
891 if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
894 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
896 if (!argsUsed[i] && resultValues[i] == valueSoFar)
900 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
912 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
914 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
919 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
921 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
925 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
926 : m_imageType (imageType)
927 , m_imageSize (imageSize)
930 const ImageType m_imageType;
931 const tcu::UVec3 m_imageSize;
934 static const ImageParams imageParamsArray[] =
936 ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
937 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
938 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
939 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
940 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(64u, 64u, 8u)),
941 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
942 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u))
945 static const tcu::TextureFormat formats[] =
947 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
948 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32)
951 for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
953 const AtomicOperation operation = (AtomicOperation)operationI;
955 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
957 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
959 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
960 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
962 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
964 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
966 const TextureFormat& format = formats[formatNdx];
967 const std::string formatName = getShaderImageFormatQualifier(format);
969 //!< Atomic case checks the end result of the operations, and not the intermediate return values
970 const string caseEndResult = formatName + "_end_result";
971 imageTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
973 //!< Atomic case checks the return values of the atomic function and not the end result.
974 const string caseIntermValues = formatName + "_intermediate_values";
975 imageTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
978 operationGroup->addChild(imageTypeGroup.release());
981 imageAtomicOperationsTests->addChild(operationGroup.release());
984 return imageAtomicOperationsTests.release();