1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
24 #include "vktImageAtomicOperationTests.hpp"
26 #include "deUniquePtr.hpp"
27 #include "deStringUtil.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkImageUtil.hpp"
32 #include "vktImageTestsUtil.hpp"
33 #include "vkBuilderUtil.hpp"
35 #include "vkRefUtil.hpp"
36 #include "vkTypeUtil.hpp"
38 #include "tcuTextureUtil.hpp"
39 #include "tcuTexture.hpp"
40 #include "tcuVectorType.hpp"
53 using tcu::TextureFormat;
64 using tcu::Texture2DArray;
65 using tcu::TextureCube;
66 using tcu::PixelBufferAccess;
67 using tcu::ConstPixelBufferAccess;
69 using tcu::TestContext;
73 NUM_INVOCATIONS_PER_PIXEL = 5u
78 ATOMIC_OPERATION_ADD = 0,
84 ATOMIC_OPERATION_EXCHANGE,
89 static string getCoordStr (const ImageType imageType,
97 case IMAGE_TYPE_BUFFER:
99 case IMAGE_TYPE_1D_ARRAY:
101 return string("ivec2(" + x + "," + y + ")");
102 case IMAGE_TYPE_2D_ARRAY:
104 case IMAGE_TYPE_CUBE:
105 case IMAGE_TYPE_CUBE_ARRAY:
106 return string("ivec3(" + x + "," + y + "," + z + ")");
113 static string getAtomicFuncArgumentShaderStr (const AtomicOperation op,
117 const IVec3& gridSize)
121 case ATOMIC_OPERATION_ADD:
122 case ATOMIC_OPERATION_MIN:
123 case ATOMIC_OPERATION_MAX:
124 case ATOMIC_OPERATION_AND:
125 case ATOMIC_OPERATION_OR:
126 case ATOMIC_OPERATION_XOR:
127 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
128 case ATOMIC_OPERATION_EXCHANGE:
129 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
136 static string getAtomicOperationCaseName (const AtomicOperation op)
140 case ATOMIC_OPERATION_ADD: return string("add");
141 case ATOMIC_OPERATION_MIN: return string("min");
142 case ATOMIC_OPERATION_MAX: return string("max");
143 case ATOMIC_OPERATION_AND: return string("and");
144 case ATOMIC_OPERATION_OR: return string("or");
145 case ATOMIC_OPERATION_XOR: return string("xor");
146 case ATOMIC_OPERATION_EXCHANGE: return string("exchange");
153 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
157 case ATOMIC_OPERATION_ADD: return string("imageAtomicAdd");
158 case ATOMIC_OPERATION_MIN: return string("imageAtomicMin");
159 case ATOMIC_OPERATION_MAX: return string("imageAtomicMax");
160 case ATOMIC_OPERATION_AND: return string("imageAtomicAnd");
161 case ATOMIC_OPERATION_OR: return string("imageAtomicOr");
162 case ATOMIC_OPERATION_XOR: return string("imageAtomicXor");
163 case ATOMIC_OPERATION_EXCHANGE: return string("imageAtomicExchange");
170 static deInt32 getOperationInitialValue (const AtomicOperation op)
174 // \note 18 is just an arbitrary small nonzero value.
175 case ATOMIC_OPERATION_ADD: return 18;
176 case ATOMIC_OPERATION_MIN: return (1 << 15) - 1;
177 case ATOMIC_OPERATION_MAX: return 18;
178 case ATOMIC_OPERATION_AND: return (1 << 15) - 1;
179 case ATOMIC_OPERATION_OR: return 18;
180 case ATOMIC_OPERATION_XOR: return 18;
181 case ATOMIC_OPERATION_EXCHANGE: return 18;
188 static deInt32 getAtomicFuncArgument (const AtomicOperation op, const IVec3& invocationID, const IVec3& gridSize)
190 const int x = invocationID.x();
191 const int y = invocationID.y();
192 const int z = invocationID.z();
196 // \note Fall-throughs.
197 case ATOMIC_OPERATION_ADD:
198 case ATOMIC_OPERATION_MIN:
199 case ATOMIC_OPERATION_MAX:
200 case ATOMIC_OPERATION_AND:
201 case ATOMIC_OPERATION_OR:
202 case ATOMIC_OPERATION_XOR:
203 return x*x + y*y + z*z;
204 case ATOMIC_OPERATION_EXCHANGE:
205 return (z*gridSize.x() + x)*gridSize.y() + y;
212 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
213 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
215 return op == ATOMIC_OPERATION_ADD ||
216 op == ATOMIC_OPERATION_MIN ||
217 op == ATOMIC_OPERATION_MAX ||
218 op == ATOMIC_OPERATION_AND ||
219 op == ATOMIC_OPERATION_OR ||
220 op == ATOMIC_OPERATION_XOR;
223 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
224 static deInt32 computeBinaryAtomicOperationResult (const AtomicOperation op, const deInt32 a, const deInt32 b)
228 case ATOMIC_OPERATION_ADD: return a + b;
229 case ATOMIC_OPERATION_MIN: return de::min(a, b);
230 case ATOMIC_OPERATION_MAX: return de::max(a, b);
231 case ATOMIC_OPERATION_AND: return a & b;
232 case ATOMIC_OPERATION_OR: return a | b;
233 case ATOMIC_OPERATION_XOR: return a ^ b;
234 case ATOMIC_OPERATION_EXCHANGE: return b;
241 class BinaryAtomicEndResultCase : public vkt::TestCase
244 BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
246 const string& description,
247 const ImageType imageType,
248 const tcu::UVec3& imageSize,
249 const tcu::TextureFormat& format,
250 const AtomicOperation operation,
251 const glu::GLSLVersion glslVersion);
253 void initPrograms (SourceCollections& sourceCollections) const;
254 TestInstance* createInstance (Context& context) const;
257 const ImageType m_imageType;
258 const tcu::UVec3 m_imageSize;
259 const tcu::TextureFormat m_format;
260 const AtomicOperation m_operation;
261 const glu::GLSLVersion m_glslVersion;
264 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
266 const string& description,
267 const ImageType imageType,
268 const tcu::UVec3& imageSize,
269 const tcu::TextureFormat& format,
270 const AtomicOperation operation,
271 const glu::GLSLVersion glslVersion)
272 : TestCase (testCtx, name, description)
273 , m_imageType (imageType)
274 , m_imageSize (imageSize)
276 , m_operation (operation)
277 , m_glslVersion (glslVersion)
281 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
283 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
285 const bool uintFormat = isUintFormat(mapTextureFormat(m_format));
286 const bool intFormat = isIntFormat(mapTextureFormat(m_format));
287 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
288 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
290 const string atomicArgExpr = (uintFormat ? "uint" : intFormat ? "int" : "float")
291 + getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
293 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + ", " + atomicArgExpr + ")";
294 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
295 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
297 string source = versionDecl + "\n"
298 "precision highp " + shaderImageTypeStr + ";\n"
300 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
301 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
305 " int gx = int(gl_GlobalInvocationID.x);\n"
306 " int gy = int(gl_GlobalInvocationID.y);\n"
307 " int gz = int(gl_GlobalInvocationID.z);\n"
308 " " + atomicInvocation + ";\n"
311 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
314 class BinaryAtomicIntermValuesCase : public vkt::TestCase
317 BinaryAtomicIntermValuesCase (tcu::TestContext& testCtx,
319 const string& description,
320 const ImageType imageType,
321 const tcu::UVec3& imageSize,
322 const tcu::TextureFormat& format,
323 const AtomicOperation operation,
324 const glu::GLSLVersion glslVersion);
326 void initPrograms (SourceCollections& sourceCollections) const;
327 TestInstance* createInstance (Context& context) const;
330 const ImageType m_imageType;
331 const tcu::UVec3 m_imageSize;
332 const tcu::TextureFormat m_format;
333 const AtomicOperation m_operation;
334 const glu::GLSLVersion m_glslVersion;
337 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext& testCtx,
339 const string& description,
340 const ImageType imageType,
341 const tcu::UVec3& imageSize,
342 const TextureFormat& format,
343 const AtomicOperation operation,
344 const glu::GLSLVersion glslVersion)
345 : TestCase (testCtx, name, description)
346 , m_imageType (imageType)
347 , m_imageSize (imageSize)
349 , m_operation (operation)
350 , m_glslVersion (glslVersion)
354 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
356 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
358 const bool uintFormat = isUintFormat(mapTextureFormat(m_format));
359 const bool intFormat = isIntFormat(mapTextureFormat(m_format));
360 const string colorVecTypeName = string(uintFormat ? "u" : intFormat ? "i" : "") + "vec4";
361 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
362 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
363 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
364 const string atomicArgExpr = (uintFormat ? "uint" : intFormat ? "int" : "float")
365 + getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
367 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + ", " + atomicArgExpr + ")";
368 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
369 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
371 string source = versionDecl + "\n"
372 "precision highp " + shaderImageTypeStr + ";\n"
374 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
375 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
376 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
380 " int gx = int(gl_GlobalInvocationID.x);\n"
381 " int gy = int(gl_GlobalInvocationID.y);\n"
382 " int gz = int(gl_GlobalInvocationID.z);\n"
383 " imageStore(u_intermValuesImage, " + invocationCoord + ", " + colorVecTypeName + "(" + atomicInvocation + "));\n"
386 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
389 class BinaryAtomicInstanceBase : public vkt::TestInstance
393 BinaryAtomicInstanceBase (Context& context,
395 const ImageType imageType,
396 const tcu::UVec3& imageSize,
397 const TextureFormat& format,
398 const AtomicOperation operation);
400 tcu::TestStatus iterate (void);
402 virtual deUint32 getOutputBufferSize (void) const = 0;
404 virtual void prepareResources (void) = 0;
405 virtual void prepareDescriptors (void) = 0;
407 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const = 0;
408 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer) const = 0;
410 virtual bool verifyResult (Allocation& outputBufferAllocation) const = 0;
414 const ImageType m_imageType;
415 const tcu::UVec3 m_imageSize;
416 const TextureFormat m_format;
417 const AtomicOperation m_operation;
419 de::MovePtr<Buffer> m_outputBuffer;
420 Move<VkDescriptorPool> m_descriptorPool;
421 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
422 Move<VkDescriptorSet> m_descriptorSet;
423 de::MovePtr<Image> m_resultImage;
424 Move<VkImageView> m_resultImageView;
427 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context& context,
429 const ImageType imageType,
430 const tcu::UVec3& imageSize,
431 const TextureFormat& format,
432 const AtomicOperation operation)
433 : vkt::TestInstance (context)
435 , m_imageType (imageType)
436 , m_imageSize (imageSize)
438 , m_operation (operation)
442 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
444 const VkDevice device = m_context.getDevice();
445 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
446 const VkQueue queue = m_context.getUniversalQueue();
447 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
448 Allocator& allocator = m_context.getDefaultAllocator();
449 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
450 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
452 const VkImageCreateInfo imageParams =
454 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
455 DE_NULL, // const void* pNext;
456 (m_imageType == IMAGE_TYPE_CUBE ||
457 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
458 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
459 (VkImageCreateFlags)0u), // VkImageCreateFlags flags;
460 mapImageType(m_imageType), // VkImageType imageType;
461 mapTextureFormat(m_format), // VkFormat format;
462 makeExtent3D(getLayerSize(m_imageType, m_imageSize)), // VkExtent3D extent;
463 1u, // deUint32 mipLevels;
464 getNumLayers(m_imageType, m_imageSize), // deUint32 arrayLayers;
465 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
466 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
467 VK_IMAGE_USAGE_STORAGE_BIT |
468 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
469 VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
470 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
471 0u, // deUint32 queueFamilyIndexCount;
472 DE_NULL, // const deUint32* pQueueFamilyIndices;
473 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
476 //Create the image that is going to store results of atomic operations
477 m_resultImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
479 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
481 m_resultImageView = makeImageView(deviceInterface, device, m_resultImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
483 //Prepare the buffer with the initial data for the image
484 const Buffer inputBuffer(deviceInterface, device, allocator, makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
486 Allocation& inputBufferAllocation = inputBuffer.getAllocation();
488 //Prepare the initial data for the image
489 const tcu::IVec4 initialValue(getOperationInitialValue(m_operation));
491 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
492 tcu::PixelBufferAccess inputPixelBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), inputBufferAllocation.getHostPtr());
494 for (deUint32 z = 0; z < gridSize.z(); z++)
495 for (deUint32 y = 0; y < gridSize.y(); y++)
496 for (deUint32 x = 0; x < gridSize.x(); x++)
498 inputPixelBuffer.setPixel(initialValue, x, y, z);
501 flushMappedMemoryRange(deviceInterface, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), imageSizeInBytes);
503 // Create a buffer to store shader output copied from result image
504 m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface, device, allocator, makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
508 prepareDescriptors();
511 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
512 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
513 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
515 // Create command buffer
516 const Unique<VkCommandPool> cmdPool(makeCommandPool(deviceInterface, device, queueFamilyIndex));
517 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(deviceInterface, device, *cmdPool));
519 beginCommandBuffer(deviceInterface, *cmdBuffer);
521 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
522 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
524 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier =
525 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT,
526 VK_ACCESS_TRANSFER_READ_BIT,
531 const VkImageMemoryBarrier resultImagePreCopyBarrier =
532 makeImageMemoryBarrier( 0u,
533 VK_ACCESS_TRANSFER_WRITE_BIT,
534 VK_IMAGE_LAYOUT_UNDEFINED,
535 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
536 m_resultImage->get(),
539 deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 1u, &inputBufferPostHostWriteBarrier, 1u, &resultImagePreCopyBarrier);
541 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
543 deviceInterface.cmdCopyBufferToImage(*cmdBuffer, *inputBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &bufferImageCopyParams);
545 const VkImageMemoryBarrier resultImagePostCopyBarrier =
546 makeImageMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
547 VK_ACCESS_SHADER_READ_BIT,
548 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
549 VK_IMAGE_LAYOUT_GENERAL,
550 m_resultImage->get(),
553 deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostCopyBarrier);
555 commandsBeforeCompute(*cmdBuffer);
557 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
559 commandsAfterCompute(*cmdBuffer);
561 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier
562 = makeBufferMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
563 VK_ACCESS_HOST_READ_BIT,
564 m_outputBuffer->get(),
568 deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
570 endCommandBuffer(deviceInterface, *cmdBuffer);
572 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer);
574 Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
576 invalidateMappedMemoryRange(deviceInterface, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outBuffSizeInBytes);
578 if (verifyResult(outputBufferAllocation))
579 return tcu::TestStatus::pass("Comparison succeeded");
581 return tcu::TestStatus::fail("Comparison failed");
584 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
588 BinaryAtomicEndResultInstance (Context& context,
590 const ImageType imageType,
591 const tcu::UVec3& imageSize,
592 const TextureFormat& format,
593 const AtomicOperation operation)
594 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
596 virtual deUint32 getOutputBufferSize (void) const;
598 virtual void prepareResources (void) {}
599 virtual void prepareDescriptors (void);
601 virtual void commandsBeforeCompute (const VkCommandBuffer) const {}
602 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer) const;
604 virtual bool verifyResult (Allocation& outputBufferAllocation) const;
607 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
609 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
612 void BinaryAtomicEndResultInstance::prepareDescriptors (void)
614 const VkDevice device = m_context.getDevice();
615 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
617 m_descriptorSetLayout =
618 DescriptorSetLayoutBuilder()
619 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
620 .build(deviceInterface, device);
623 DescriptorPoolBuilder()
624 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
625 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
627 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
629 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
631 DescriptorSetUpdateBuilder()
632 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
633 .update(deviceInterface, device);
636 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
638 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
639 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
641 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
642 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
643 VK_ACCESS_TRANSFER_READ_BIT,
644 VK_IMAGE_LAYOUT_GENERAL,
645 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
646 m_resultImage->get(),
649 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostDispatchBarrier);
651 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
653 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
656 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation) const
658 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
659 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
661 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
663 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
664 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
665 for (deInt32 x = 0; x < resultBuffer.getWidth(); x++)
667 deInt32 resultValue = resultBuffer.getPixelInt(x, y, z).x();
669 if (isOrderIndependentAtomicOperation(m_operation))
671 deInt32 reference = getOperationInitialValue(m_operation);
673 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
675 const IVec3 gid(x + i*gridSize.x(), y, z);
676 reference = computeBinaryAtomicOperationResult(m_operation, reference, getAtomicFuncArgument(m_operation, gid, extendedGridSize));
679 if (resultValue != reference)
682 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
684 // Check if the end result equals one of the atomic args.
685 bool matchFound = false;
687 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
689 const IVec3 gid(x + i*gridSize.x(), y, z);
690 matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
702 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
704 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
707 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
711 BinaryAtomicIntermValuesInstance (Context& context,
713 const ImageType imageType,
714 const tcu::UVec3& imageSize,
715 const TextureFormat& format,
716 const AtomicOperation operation)
717 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
719 virtual deUint32 getOutputBufferSize (void) const;
721 virtual void prepareResources (void);
722 virtual void prepareDescriptors (void);
724 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const;
725 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer) const;
727 virtual bool verifyResult (Allocation& outputBufferAllocation) const;
731 bool verifyRecursive (const deInt32 index,
732 const deInt32 valueSoFar,
733 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
734 const deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
735 const deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
736 de::MovePtr<Image> m_intermResultsImage;
737 Move<VkImageView> m_intermResultsImageView;
740 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
742 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
745 void BinaryAtomicIntermValuesInstance::prepareResources (void)
747 const VkDevice device = m_context.getDevice();
748 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
749 Allocator& allocator = m_context.getDefaultAllocator();
751 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
752 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
754 const VkImageCreateInfo imageParams =
756 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
757 DE_NULL, // const void* pNext;
758 (m_imageType == IMAGE_TYPE_CUBE ||
759 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
760 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
761 (VkImageCreateFlags)0u), // VkImageCreateFlags flags;
762 mapImageType(m_imageType), // VkImageType imageType;
763 mapTextureFormat(m_format), // VkFormat format;
764 makeExtent3D(extendedLayerSize), // VkExtent3D extent;
765 1u, // deUint32 mipLevels;
766 getNumLayers(m_imageType, m_imageSize), // deUint32 arrayLayers;
767 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
768 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
769 VK_IMAGE_USAGE_STORAGE_BIT |
770 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
771 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
772 0u, // deUint32 queueFamilyIndexCount;
773 DE_NULL, // const deUint32* pQueueFamilyIndices;
774 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
777 m_intermResultsImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
779 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
781 m_intermResultsImageView = makeImageView(deviceInterface, device, m_intermResultsImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
784 void BinaryAtomicIntermValuesInstance::prepareDescriptors (void)
786 const VkDevice device = m_context.getDevice();
787 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
789 m_descriptorSetLayout =
790 DescriptorSetLayoutBuilder()
791 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
792 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
793 .build(deviceInterface, device);
796 DescriptorPoolBuilder()
797 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u)
798 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
800 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
802 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
803 const VkDescriptorImageInfo descIntermResultsImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
805 DescriptorSetUpdateBuilder()
806 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
807 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descIntermResultsImageInfo)
808 .update(deviceInterface, device);
811 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
813 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
814 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
816 const VkImageMemoryBarrier imagePreDispatchBarrier =
817 makeImageMemoryBarrier( 0u,
818 VK_ACCESS_SHADER_WRITE_BIT,
819 VK_IMAGE_LAYOUT_UNDEFINED,
820 VK_IMAGE_LAYOUT_GENERAL,
821 m_intermResultsImage->get(),
824 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
827 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
829 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
830 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
832 const VkImageMemoryBarrier imagePostDispatchBarrier =
833 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
834 VK_ACCESS_TRANSFER_READ_BIT,
835 VK_IMAGE_LAYOUT_GENERAL,
836 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
837 m_intermResultsImage->get(),
840 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
842 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
843 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
844 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
846 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
849 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation& outputBufferAllocation) const
851 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
852 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
854 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
856 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
857 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
858 for (deUint32 x = 0; x < gridSize.x(); x++)
860 deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL];
861 deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
862 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
864 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
866 IVec3 gid(x + i*gridSize.x(), y, z);
868 resultValues[i] = resultBuffer.getPixelInt(gid.x(), gid.y(), gid.z()).x();
869 atomicArgs[i] = getAtomicFuncArgument(m_operation, gid, extendedGridSize);
873 // Verify that the return values form a valid sequence.
874 if (!verifyRecursive(0, getOperationInitialValue(m_operation), argsUsed, atomicArgs, resultValues))
883 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32 index,
884 const deInt32 valueSoFar,
885 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
886 const deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
887 const deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
889 if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
892 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
894 if (!argsUsed[i] && resultValues[i] == valueSoFar)
898 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
910 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
912 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
917 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
919 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
923 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
924 : m_imageType (imageType)
925 , m_imageSize (imageSize)
928 const ImageType m_imageType;
929 const tcu::UVec3 m_imageSize;
932 static const ImageParams imageParamsArray[] =
934 ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
935 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
936 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
937 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
938 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(64u, 64u, 8u)),
939 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
940 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u))
943 static const tcu::TextureFormat formats[] =
945 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
946 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32)
949 for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
951 const AtomicOperation operation = (AtomicOperation)operationI;
953 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
955 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
957 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
958 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
960 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
962 const TextureFormat& format = formats[formatNdx];
963 const std::string formatName = getShaderImageFormatQualifier(format);
965 //!< Atomic case checks the end result of the operations, and not the intermediate return values
966 const string caseEndResult = getImageTypeName(imageType) + "_" + formatName + "_end_result";
967 operationGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
969 //!< Atomic case checks the return values of the atomic function and not the end result.
970 const string caseIntermValues = getImageTypeName(imageType) + "_" + formatName + "_intermediate_values";
971 operationGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
975 imageAtomicOperationsTests->addChild(operationGroup.release());
978 return imageAtomicOperationsTests.release();