1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2015 Google Inc.
6 * Copyright (c) 2015 Mobica Ltd.
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and/or associated documentation files (the
10 * "Materials"), to deal in the Materials without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sublicense, and/or sell copies of the Materials, and to
13 * permit persons to whom the Materials are furnished to do so, subject to
14 * the following conditions:
16 * The above copyright notice(s) and this permission notice shall be included
17 * in all copies or substantial portions of the Materials.
19 * The Materials are Confidential Information as defined by the
20 * Khronos Membership Agreement until designated non-confidential by Khronos,
21 * at which point this condition clause shall be removed.
23 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
26 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
27 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
28 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
29 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
33 * \brief Compute Shader Tests
34 *//*--------------------------------------------------------------------*/
36 #include "vktComputeBasicComputeShaderTests.hpp"
37 #include "vktTestCase.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "vktComputeTestsUtil.hpp"
43 #include "vkRefUtil.hpp"
44 #include "vkPlatform.hpp"
45 #include "vkPrograms.hpp"
46 #include "vkRefUtil.hpp"
47 #include "vkMemUtil.hpp"
48 #include "vkQueryUtil.hpp"
49 #include "vkBuilderUtil.hpp"
50 #include "vkTypeUtil.hpp"
52 #include "deStringUtil.hpp"
53 #include "deUniquePtr.hpp"
54 #include "deRandom.hpp"
67 template<typename T, int size>
68 T multiplyComponents (const tcu::Vector<T, size>& v)
71 for (int i = 0; i < size; ++i)
77 inline T squared (const T& a)
82 inline VkImageCreateInfo make2DImageCreateInfo (const tcu::IVec2& imageSize, const VkImageUsageFlags usage)
84 const VkImageCreateInfo imageParams =
86 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
87 DE_NULL, // const void* pNext;
88 0u, // VkImageCreateFlags flags;
89 VK_IMAGE_TYPE_2D, // VkImageType imageType;
90 VK_FORMAT_R32_UINT, // VkFormat format;
91 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), // VkExtent3D extent;
92 1u, // deUint32 mipLevels;
93 1u, // deUint32 arrayLayers;
94 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
95 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
96 usage, // VkImageUsageFlags usage;
97 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
98 0u, // deUint32 queueFamilyIndexCount;
99 DE_NULL, // const deUint32* pQueueFamilyIndices;
100 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
105 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2& imageSize)
107 return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
116 class SharedVarTest : public vkt::TestCase
119 SharedVarTest (tcu::TestContext& testCtx,
120 const std::string& name,
121 const std::string& description,
122 const tcu::IVec3& localSize,
123 const tcu::IVec3& workSize);
125 void initPrograms (SourceCollections& sourceCollections) const;
126 TestInstance* createInstance (Context& context) const;
129 const tcu::IVec3 m_localSize;
130 const tcu::IVec3 m_workSize;
133 class SharedVarTestInstance : public vkt::TestInstance
136 SharedVarTestInstance (Context& context,
137 const tcu::IVec3& localSize,
138 const tcu::IVec3& workSize);
140 tcu::TestStatus iterate (void);
143 const tcu::IVec3 m_localSize;
144 const tcu::IVec3 m_workSize;
147 SharedVarTest::SharedVarTest (tcu::TestContext& testCtx,
148 const std::string& name,
149 const std::string& description,
150 const tcu::IVec3& localSize,
151 const tcu::IVec3& workSize)
152 : TestCase (testCtx, name, description)
153 , m_localSize (localSize)
154 , m_workSize (workSize)
158 void SharedVarTest::initPrograms (SourceCollections& sourceCollections) const
160 const int workGroupSize = multiplyComponents(m_localSize);
161 const int workGroupCount = multiplyComponents(m_workSize);
162 const int numValues = workGroupSize * workGroupCount;
164 std::ostringstream src;
165 src << "#version 310 es\n"
166 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
167 << "layout(binding = 0) writeonly buffer Output {\n"
168 << " uint values[" << numValues << "];\n"
170 << "shared uint offsets[" << workGroupSize << "];\n\n"
171 << "void main (void) {\n"
172 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
173 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
174 << " uint globalOffs = localSize*globalNdx;\n"
175 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
177 << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
178 << " memoryBarrierShared();\n"
180 << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
183 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
186 TestInstance* SharedVarTest::createInstance (Context& context) const
188 return new SharedVarTestInstance(context, m_localSize, m_workSize);
191 SharedVarTestInstance::SharedVarTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
192 : TestInstance (context)
193 , m_localSize (localSize)
194 , m_workSize (workSize)
198 tcu::TestStatus SharedVarTestInstance::iterate (void)
200 const DeviceInterface& vk = m_context.getDeviceInterface();
201 const VkDevice device = m_context.getDevice();
202 const VkQueue queue = m_context.getUniversalQueue();
203 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
204 Allocator& allocator = m_context.getDefaultAllocator();
206 const int workGroupSize = multiplyComponents(m_localSize);
207 const int workGroupCount = multiplyComponents(m_workSize);
209 // Create a buffer and host-visible memory for it
211 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
212 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
214 // Create descriptor set
216 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
217 DescriptorSetLayoutBuilder()
218 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
221 const Unique<VkDescriptorPool> descriptorPool(
222 DescriptorPoolBuilder()
223 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
224 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
226 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
228 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
229 DescriptorSetUpdateBuilder()
230 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
233 // Perform the computation
235 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
236 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
237 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
239 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
240 const void* barriers[] = { &computeFinishBarrier };
242 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
243 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
245 // Start recording commands
247 beginCommandBuffer(vk, *cmdBuffer);
249 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
250 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
252 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
254 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriers), barriers);
256 endCommandBuffer(vk, *cmdBuffer);
258 // Wait for completion
260 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
262 // Validate the results
264 const Allocation& bufferAllocation = buffer.getAllocation();
265 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
267 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
269 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
271 const int globalOffset = groupNdx * workGroupSize;
272 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
274 const deUint32 res = bufferPtr[globalOffset + localOffset];
275 const deUint32 ref = globalOffset + squared(workGroupSize - localOffset - 1);
279 std::ostringstream msg;
280 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
281 return tcu::TestStatus::fail(msg.str());
285 return tcu::TestStatus::pass("Compute succeeded");
288 class SharedVarAtomicOpTest : public vkt::TestCase
291 SharedVarAtomicOpTest (tcu::TestContext& testCtx,
292 const std::string& name,
293 const std::string& description,
294 const tcu::IVec3& localSize,
295 const tcu::IVec3& workSize);
297 void initPrograms (SourceCollections& sourceCollections) const;
298 TestInstance* createInstance (Context& context) const;
301 const tcu::IVec3 m_localSize;
302 const tcu::IVec3 m_workSize;
305 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
308 SharedVarAtomicOpTestInstance (Context& context,
309 const tcu::IVec3& localSize,
310 const tcu::IVec3& workSize);
312 tcu::TestStatus iterate (void);
315 const tcu::IVec3 m_localSize;
316 const tcu::IVec3 m_workSize;
319 SharedVarAtomicOpTest::SharedVarAtomicOpTest (tcu::TestContext& testCtx,
320 const std::string& name,
321 const std::string& description,
322 const tcu::IVec3& localSize,
323 const tcu::IVec3& workSize)
324 : TestCase (testCtx, name, description)
325 , m_localSize (localSize)
326 , m_workSize (workSize)
330 void SharedVarAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
332 const int workGroupSize = multiplyComponents(m_localSize);
333 const int workGroupCount = multiplyComponents(m_workSize);
334 const int numValues = workGroupSize * workGroupCount;
336 std::ostringstream src;
337 src << "#version 310 es\n"
338 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
339 << "layout(binding = 0) writeonly buffer Output {\n"
340 << " uint values[" << numValues << "];\n"
342 << "shared uint count;\n\n"
343 << "void main (void) {\n"
344 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
345 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
346 << " uint globalOffs = localSize*globalNdx;\n"
349 << " memoryBarrierShared();\n"
351 << " uint oldVal = atomicAdd(count, 1u);\n"
352 << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
355 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
358 TestInstance* SharedVarAtomicOpTest::createInstance (Context& context) const
360 return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize);
363 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
364 : TestInstance (context)
365 , m_localSize (localSize)
366 , m_workSize (workSize)
370 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate (void)
372 const DeviceInterface& vk = m_context.getDeviceInterface();
373 const VkDevice device = m_context.getDevice();
374 const VkQueue queue = m_context.getUniversalQueue();
375 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
376 Allocator& allocator = m_context.getDefaultAllocator();
378 const int workGroupSize = multiplyComponents(m_localSize);
379 const int workGroupCount = multiplyComponents(m_workSize);
381 // Create a buffer and host-visible memory for it
383 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
384 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
386 // Create descriptor set
388 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
389 DescriptorSetLayoutBuilder()
390 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
393 const Unique<VkDescriptorPool> descriptorPool(
394 DescriptorPoolBuilder()
395 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
396 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
398 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
400 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
401 DescriptorSetUpdateBuilder()
402 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
405 // Perform the computation
407 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
408 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
409 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
411 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
412 const void* barriers[] = { &computeFinishBarrier };
414 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
415 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
417 // Start recording commands
419 beginCommandBuffer(vk, *cmdBuffer);
421 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
422 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
424 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
426 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriers), barriers);
428 endCommandBuffer(vk, *cmdBuffer);
430 // Wait for completion
432 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
434 // Validate the results
436 const Allocation& bufferAllocation = buffer.getAllocation();
437 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
439 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
441 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
443 const int globalOffset = groupNdx * workGroupSize;
444 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
446 const deUint32 res = bufferPtr[globalOffset + localOffset];
447 const deUint32 ref = localOffset + 1;
451 std::ostringstream msg;
452 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
453 return tcu::TestStatus::fail(msg.str());
457 return tcu::TestStatus::pass("Compute succeeded");
460 class SSBOLocalBarrierTest : public vkt::TestCase
463 SSBOLocalBarrierTest (tcu::TestContext& testCtx,
464 const std::string& name,
465 const std::string& description,
466 const tcu::IVec3& localSize,
467 const tcu::IVec3& workSize);
469 void initPrograms (SourceCollections& sourceCollections) const;
470 TestInstance* createInstance (Context& context) const;
473 const tcu::IVec3 m_localSize;
474 const tcu::IVec3 m_workSize;
477 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
480 SSBOLocalBarrierTestInstance (Context& context,
481 const tcu::IVec3& localSize,
482 const tcu::IVec3& workSize);
484 tcu::TestStatus iterate (void);
487 const tcu::IVec3 m_localSize;
488 const tcu::IVec3 m_workSize;
491 SSBOLocalBarrierTest::SSBOLocalBarrierTest (tcu::TestContext& testCtx,
492 const std::string& name,
493 const std::string& description,
494 const tcu::IVec3& localSize,
495 const tcu::IVec3& workSize)
496 : TestCase (testCtx, name, description)
497 , m_localSize (localSize)
498 , m_workSize (workSize)
502 void SSBOLocalBarrierTest::initPrograms (SourceCollections& sourceCollections) const
504 const int workGroupSize = multiplyComponents(m_localSize);
505 const int workGroupCount = multiplyComponents(m_workSize);
506 const int numValues = workGroupSize * workGroupCount;
508 std::ostringstream src;
509 src << "#version 310 es\n"
510 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
511 << "layout(binding = 0) coherent buffer Output {\n"
512 << " uint values[" << numValues << "];\n"
514 << "void main (void) {\n"
515 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
516 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
517 << " uint globalOffs = localSize*globalNdx;\n"
518 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
520 << " sb_out.values[globalOffs + localOffs] = globalOffs;\n"
521 << " memoryBarrierBuffer();\n"
523 << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" // += so we read and write
524 << " memoryBarrierBuffer();\n"
526 << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
529 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
532 TestInstance* SSBOLocalBarrierTest::createInstance (Context& context) const
534 return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize);
537 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
538 : TestInstance (context)
539 , m_localSize (localSize)
540 , m_workSize (workSize)
544 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate (void)
546 const DeviceInterface& vk = m_context.getDeviceInterface();
547 const VkDevice device = m_context.getDevice();
548 const VkQueue queue = m_context.getUniversalQueue();
549 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
550 Allocator& allocator = m_context.getDefaultAllocator();
552 const int workGroupSize = multiplyComponents(m_localSize);
553 const int workGroupCount = multiplyComponents(m_workSize);
555 // Create a buffer and host-visible memory for it
557 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
558 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
560 // Create descriptor set
562 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
563 DescriptorSetLayoutBuilder()
564 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
567 const Unique<VkDescriptorPool> descriptorPool(
568 DescriptorPoolBuilder()
569 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
570 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
572 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
574 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
575 DescriptorSetUpdateBuilder()
576 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
579 // Perform the computation
581 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
582 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
583 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
585 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
586 const void* barriers[] = { &computeFinishBarrier };
588 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
589 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
591 // Start recording commands
593 beginCommandBuffer(vk, *cmdBuffer);
595 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
596 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
598 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
600 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriers), barriers);
602 endCommandBuffer(vk, *cmdBuffer);
604 // Wait for completion
606 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
608 // Validate the results
610 const Allocation& bufferAllocation = buffer.getAllocation();
611 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
613 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
615 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
617 const int globalOffset = groupNdx * workGroupSize;
618 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
620 const deUint32 res = bufferPtr[globalOffset + localOffset];
621 const int offs0 = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) : ((localOffset - 1) % workGroupSize);
622 const int offs1 = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) : ((localOffset - 2) % workGroupSize);
623 const deUint32 ref = static_cast<deUint32>(globalOffset + offs0 + offs1);
627 std::ostringstream msg;
628 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
629 return tcu::TestStatus::fail(msg.str());
633 return tcu::TestStatus::pass("Compute succeeded");
636 class CopyImageToSSBOTest : public vkt::TestCase
639 CopyImageToSSBOTest (tcu::TestContext& testCtx,
640 const std::string& name,
641 const std::string& description,
642 const tcu::IVec2& localSize,
643 const tcu::IVec2& imageSize);
645 void initPrograms (SourceCollections& sourceCollections) const;
646 TestInstance* createInstance (Context& context) const;
649 const tcu::IVec2 m_localSize;
650 const tcu::IVec2 m_imageSize;
653 class CopyImageToSSBOTestInstance : public vkt::TestInstance
656 CopyImageToSSBOTestInstance (Context& context,
657 const tcu::IVec2& localSize,
658 const tcu::IVec2& imageSize);
660 tcu::TestStatus iterate (void);
663 const tcu::IVec2 m_localSize;
664 const tcu::IVec2 m_imageSize;
667 CopyImageToSSBOTest::CopyImageToSSBOTest (tcu::TestContext& testCtx,
668 const std::string& name,
669 const std::string& description,
670 const tcu::IVec2& localSize,
671 const tcu::IVec2& imageSize)
672 : TestCase (testCtx, name, description)
673 , m_localSize (localSize)
674 , m_imageSize (imageSize)
676 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
677 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
680 void CopyImageToSSBOTest::initPrograms (SourceCollections& sourceCollections) const
682 std::ostringstream src;
683 src << "#version 310 es\n"
684 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
685 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
686 << "layout(binding = 0) writeonly buffer Output {\n"
687 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
689 << "void main (void) {\n"
690 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
691 << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
692 << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
695 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
698 TestInstance* CopyImageToSSBOTest::createInstance (Context& context) const
700 return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize);
703 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
704 : TestInstance (context)
705 , m_localSize (localSize)
706 , m_imageSize (imageSize)
710 tcu::TestStatus CopyImageToSSBOTestInstance::iterate (void)
712 const DeviceInterface& vk = m_context.getDeviceInterface();
713 const VkDevice device = m_context.getDevice();
714 const VkQueue queue = m_context.getUniversalQueue();
715 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
716 Allocator& allocator = m_context.getDefaultAllocator();
720 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
721 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
723 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
724 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
726 // Staging buffer (source data for image)
728 const deUint32 imageArea = multiplyComponents(m_imageSize);
729 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
731 const Buffer stagingBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
733 // Populate the staging buffer with test data
735 de::Random rnd(0xab2c7);
736 const Allocation& stagingBufferAllocation = stagingBuffer.getAllocation();
737 deUint32* bufferPtr = static_cast<deUint32*>(stagingBufferAllocation.getHostPtr());
738 for (deUint32 i = 0; i < imageArea; ++i)
739 *bufferPtr++ = rnd.getUint32();
741 flushMappedMemoryRange(vk, device, stagingBufferAllocation.getMemory(), stagingBufferAllocation.getOffset(), bufferSizeBytes);
744 // Create a buffer to store shader output
746 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
748 // Create descriptor set
750 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
751 DescriptorSetLayoutBuilder()
752 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
753 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
756 const Unique<VkDescriptorPool> descriptorPool(
757 DescriptorPoolBuilder()
758 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
759 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
760 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
762 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
766 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
767 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
769 DescriptorSetUpdateBuilder()
770 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
771 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
774 // Perform the computation
776 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
777 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
778 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
780 const VkBufferMemoryBarrier stagingBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *stagingBuffer, 0ull, bufferSizeBytes);
782 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
784 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
785 *image, subresourceRange);
787 const VkImageMemoryBarrier imagePostCopyBarrier = makeImageMemoryBarrier(
788 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
789 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
790 *image, subresourceRange);
792 const void* preCopyBarriers[] = { &stagingBufferPostHostWriteBarrier, &imagePreCopyBarrier };
793 const void* postCopyBarriers[] = { &imagePostCopyBarrier };
795 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
796 const void* postComputeBarriers[] = { &computeFinishBarrier };
798 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
799 const tcu::IVec2 workSize = m_imageSize / m_localSize;
801 // Prepare the command buffer
803 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
804 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
806 // Start recording commands
808 beginCommandBuffer(vk, *cmdBuffer);
810 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
811 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
813 vk.cmdPipelineBarrier(*cmdBuffer, 0u, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preCopyBarriers), preCopyBarriers);
814 vk.cmdCopyBufferToImage(*cmdBuffer, *stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, ©Params);
815 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postCopyBarriers), postCopyBarriers);
817 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
818 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postComputeBarriers), postComputeBarriers);
820 endCommandBuffer(vk, *cmdBuffer);
822 // Wait for completion
824 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
827 // Validate the results
829 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
830 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
832 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
833 const deUint32* refBufferPtr = static_cast<deUint32*>(stagingBuffer.getAllocation().getHostPtr());
835 for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
837 const deUint32 res = *(bufferPtr + ndx);
838 const deUint32 ref = *(refBufferPtr + ndx);
842 std::ostringstream msg;
843 msg << "Comparison failed for Output.values[" << ndx << "]";
844 return tcu::TestStatus::fail(msg.str());
847 return tcu::TestStatus::pass("Compute succeeded");
850 class CopySSBOToImageTest : public vkt::TestCase
853 CopySSBOToImageTest (tcu::TestContext& testCtx,
854 const std::string& name,
855 const std::string& description,
856 const tcu::IVec2& localSize,
857 const tcu::IVec2& imageSize);
859 void initPrograms (SourceCollections& sourceCollections) const;
860 TestInstance* createInstance (Context& context) const;
863 const tcu::IVec2 m_localSize;
864 const tcu::IVec2 m_imageSize;
867 class CopySSBOToImageTestInstance : public vkt::TestInstance
870 CopySSBOToImageTestInstance (Context& context,
871 const tcu::IVec2& localSize,
872 const tcu::IVec2& imageSize);
874 tcu::TestStatus iterate (void);
877 const tcu::IVec2 m_localSize;
878 const tcu::IVec2 m_imageSize;
881 CopySSBOToImageTest::CopySSBOToImageTest (tcu::TestContext& testCtx,
882 const std::string& name,
883 const std::string& description,
884 const tcu::IVec2& localSize,
885 const tcu::IVec2& imageSize)
886 : TestCase (testCtx, name, description)
887 , m_localSize (localSize)
888 , m_imageSize (imageSize)
890 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
891 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
894 void CopySSBOToImageTest::initPrograms (SourceCollections& sourceCollections) const
896 std::ostringstream src;
897 src << "#version 310 es\n"
898 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
899 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
900 << "layout(binding = 0) readonly buffer Input {\n"
901 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
903 << "void main (void) {\n"
904 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
905 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
906 << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
909 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
912 TestInstance* CopySSBOToImageTest::createInstance (Context& context) const
914 return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize);
917 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
918 : TestInstance (context)
919 , m_localSize (localSize)
920 , m_imageSize (imageSize)
924 tcu::TestStatus CopySSBOToImageTestInstance::iterate (void)
926 const DeviceInterface& vk = m_context.getDeviceInterface();
927 const VkDevice device = m_context.getDevice();
928 const VkQueue queue = m_context.getUniversalQueue();
929 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
930 Allocator& allocator = m_context.getDefaultAllocator();
934 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
935 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
937 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
938 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
940 // Create an input buffer (data to be read in the shader)
942 const deUint32 imageArea = multiplyComponents(m_imageSize);
943 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
945 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), MemoryRequirement::HostVisible);
947 // Populate the buffer with test data
949 de::Random rnd(0x77238ac2);
950 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
951 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
952 for (deUint32 i = 0; i < imageArea; ++i)
953 *bufferPtr++ = rnd.getUint32();
955 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
958 // Create a buffer to store shader output (copied from image data)
960 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
962 // Create descriptor set
964 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
965 DescriptorSetLayoutBuilder()
966 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
967 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
970 const Unique<VkDescriptorPool> descriptorPool(
971 DescriptorPoolBuilder()
972 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
973 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
974 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
976 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
980 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
981 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
983 DescriptorSetUpdateBuilder()
984 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
985 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
988 // Perform the computation
990 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
991 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
992 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
994 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
996 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
998 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
999 *image, subresourceRange);
1001 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
1002 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
1003 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1004 *image, subresourceRange);
1006 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1008 const void* preComputeBarriers[] = { &inputBufferPostHostWriteBarrier, &imageLayoutBarrier };
1009 const void* preCopyBarriers[] = { &imagePreCopyBarrier };
1010 const void* postCopyBarriers[] = { &outputBufferPostCopyBarrier };
1012 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
1013 const tcu::IVec2 workSize = m_imageSize / m_localSize;
1015 // Prepare the command buffer
1017 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1018 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1020 // Start recording commands
1022 beginCommandBuffer(vk, *cmdBuffer);
1024 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1025 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1027 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preComputeBarriers), preComputeBarriers);
1028 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1030 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preCopyBarriers), preCopyBarriers);
1031 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, ©Params);
1032 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postCopyBarriers), postCopyBarriers);
1034 endCommandBuffer(vk, *cmdBuffer);
1036 // Wait for completion
1038 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1041 // Validate the results
1043 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1044 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1046 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1047 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1049 for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
1051 const deUint32 res = *(bufferPtr + ndx);
1052 const deUint32 ref = *(refBufferPtr + ndx);
1056 std::ostringstream msg;
1057 msg << "Comparison failed for pixel " << ndx;
1058 return tcu::TestStatus::fail(msg.str());
1061 return tcu::TestStatus::pass("Compute succeeded");
1064 class BufferToBufferInvertTest : public vkt::TestCase
1067 void initPrograms (SourceCollections& sourceCollections) const;
1068 TestInstance* createInstance (Context& context) const;
1070 static BufferToBufferInvertTest* UBOToSSBOInvertCase (tcu::TestContext& testCtx,
1071 const std::string& name,
1072 const std::string& description,
1073 const deUint32 numValues,
1074 const tcu::IVec3& localSize,
1075 const tcu::IVec3& workSize);
1077 static BufferToBufferInvertTest* CopyInvertSSBOCase (tcu::TestContext& testCtx,
1078 const std::string& name,
1079 const std::string& description,
1080 const deUint32 numValues,
1081 const tcu::IVec3& localSize,
1082 const tcu::IVec3& workSize);
1085 BufferToBufferInvertTest (tcu::TestContext& testCtx,
1086 const std::string& name,
1087 const std::string& description,
1088 const deUint32 numValues,
1089 const tcu::IVec3& localSize,
1090 const tcu::IVec3& workSize,
1091 const BufferType bufferType);
1093 const BufferType m_bufferType;
1094 const deUint32 m_numValues;
1095 const tcu::IVec3 m_localSize;
1096 const tcu::IVec3 m_workSize;
1099 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1102 BufferToBufferInvertTestInstance (Context& context,
1103 const deUint32 numValues,
1104 const tcu::IVec3& localSize,
1105 const tcu::IVec3& workSize,
1106 const BufferType bufferType);
1108 tcu::TestStatus iterate (void);
1111 const BufferType m_bufferType;
1112 const deUint32 m_numValues;
1113 const tcu::IVec3 m_localSize;
1114 const tcu::IVec3 m_workSize;
1117 BufferToBufferInvertTest::BufferToBufferInvertTest (tcu::TestContext& testCtx,
1118 const std::string& name,
1119 const std::string& description,
1120 const deUint32 numValues,
1121 const tcu::IVec3& localSize,
1122 const tcu::IVec3& workSize,
1123 const BufferType bufferType)
1124 : TestCase (testCtx, name, description)
1125 , m_bufferType (bufferType)
1126 , m_numValues (numValues)
1127 , m_localSize (localSize)
1128 , m_workSize (workSize)
1130 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1131 DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1134 BufferToBufferInvertTest* BufferToBufferInvertTest::UBOToSSBOInvertCase (tcu::TestContext& testCtx,
1135 const std::string& name,
1136 const std::string& description,
1137 const deUint32 numValues,
1138 const tcu::IVec3& localSize,
1139 const tcu::IVec3& workSize)
1141 return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM);
1144 BufferToBufferInvertTest* BufferToBufferInvertTest::CopyInvertSSBOCase (tcu::TestContext& testCtx,
1145 const std::string& name,
1146 const std::string& description,
1147 const deUint32 numValues,
1148 const tcu::IVec3& localSize,
1149 const tcu::IVec3& workSize)
1151 return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_SSBO);
1154 void BufferToBufferInvertTest::initPrograms (SourceCollections& sourceCollections) const
1156 std::ostringstream src;
1157 if (m_bufferType == BUFFER_TYPE_UNIFORM)
1159 src << "#version 310 es\n"
1160 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1161 << "layout(binding = 0) readonly uniform Input {\n"
1162 << " uint values[" << m_numValues << "];\n"
1164 << "layout(binding = 1) writeonly buffer Output {\n"
1165 << " uint values[" << m_numValues << "];\n"
1167 << "void main (void) {\n"
1168 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1169 << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1170 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1171 << " uint offset = numValuesPerInv*groupNdx;\n"
1173 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1174 << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1177 else if (m_bufferType == BUFFER_TYPE_SSBO)
1179 src << "#version 310 es\n"
1180 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1181 << "layout(binding = 0) readonly buffer Input {\n"
1182 << " uint values[" << m_numValues << "];\n"
1184 << "layout (binding = 1) writeonly buffer Output {\n"
1185 << " uint values[" << m_numValues << "];\n"
1187 << "void main (void) {\n"
1188 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1189 << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1190 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1191 << " uint offset = numValuesPerInv*groupNdx;\n"
1193 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1194 << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1198 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1201 TestInstance* BufferToBufferInvertTest::createInstance (Context& context) const
1203 return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType);
1206 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance (Context& context,
1207 const deUint32 numValues,
1208 const tcu::IVec3& localSize,
1209 const tcu::IVec3& workSize,
1210 const BufferType bufferType)
1211 : TestInstance (context)
1212 , m_bufferType (bufferType)
1213 , m_numValues (numValues)
1214 , m_localSize (localSize)
1215 , m_workSize (workSize)
1219 tcu::TestStatus BufferToBufferInvertTestInstance::iterate (void)
1221 const DeviceInterface& vk = m_context.getDeviceInterface();
1222 const VkDevice device = m_context.getDevice();
1223 const VkQueue queue = m_context.getUniversalQueue();
1224 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1225 Allocator& allocator = m_context.getDefaultAllocator();
1227 // Customize the test based on buffer type
1229 const VkBufferUsageFlags inputBufferUsageFlags = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1230 const VkDescriptorType inputBufferDescriptorType = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1231 const deUint32 randomSeed = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1233 // Create an input buffer
1235 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1236 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags), MemoryRequirement::HostVisible);
1238 // Fill the input buffer with data
1240 de::Random rnd(randomSeed);
1241 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
1242 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
1243 for (deUint32 i = 0; i < m_numValues; ++i)
1244 *bufferPtr++ = rnd.getUint32();
1246 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
1249 // Create an output buffer
1251 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1253 // Create descriptor set
1255 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1256 DescriptorSetLayoutBuilder()
1257 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1258 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1259 .build(vk, device));
1261 const Unique<VkDescriptorPool> descriptorPool(
1262 DescriptorPoolBuilder()
1263 .addType(inputBufferDescriptorType)
1264 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1265 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1267 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1269 const VkDescriptorBufferInfo inputBufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1270 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1271 DescriptorSetUpdateBuilder()
1272 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType, &inputBufferDescriptorInfo)
1273 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1274 .update(vk, device);
1276 // Perform the computation
1278 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1279 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1280 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1282 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1283 const void* preComputeBarriers[] = { &hostWriteBarrier };
1285 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1286 const void* postComputeBarriers[] = { &shaderWriteBarrier };
1288 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1289 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1291 // Start recording commands
1293 beginCommandBuffer(vk, *cmdBuffer);
1295 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1296 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1298 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preComputeBarriers), preComputeBarriers);
1299 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1300 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postComputeBarriers), postComputeBarriers);
1302 endCommandBuffer(vk, *cmdBuffer);
1304 // Wait for completion
1306 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1308 // Validate the results
1310 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1311 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1313 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1314 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1316 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1318 const deUint32 res = bufferPtr[ndx];
1319 const deUint32 ref = ~refBufferPtr[ndx];
1323 std::ostringstream msg;
1324 msg << "Comparison failed for Output.values[" << ndx << "]";
1325 return tcu::TestStatus::fail(msg.str());
1328 return tcu::TestStatus::pass("Compute succeeded");
1331 class InvertSSBOInPlaceTest : public vkt::TestCase
1334 InvertSSBOInPlaceTest (tcu::TestContext& testCtx,
1335 const std::string& name,
1336 const std::string& description,
1337 const deUint32 numValues,
1339 const tcu::IVec3& localSize,
1340 const tcu::IVec3& workSize);
1343 void initPrograms (SourceCollections& sourceCollections) const;
1344 TestInstance* createInstance (Context& context) const;
1347 const deUint32 m_numValues;
1349 const tcu::IVec3 m_localSize;
1350 const tcu::IVec3 m_workSize;
1353 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1356 InvertSSBOInPlaceTestInstance (Context& context,
1357 const deUint32 numValues,
1358 const tcu::IVec3& localSize,
1359 const tcu::IVec3& workSize);
1361 tcu::TestStatus iterate (void);
1364 const deUint32 m_numValues;
1365 const tcu::IVec3 m_localSize;
1366 const tcu::IVec3 m_workSize;
1369 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest (tcu::TestContext& testCtx,
1370 const std::string& name,
1371 const std::string& description,
1372 const deUint32 numValues,
1374 const tcu::IVec3& localSize,
1375 const tcu::IVec3& workSize)
1376 : TestCase (testCtx, name, description)
1377 , m_numValues (numValues)
1379 , m_localSize (localSize)
1380 , m_workSize (workSize)
1382 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1385 void InvertSSBOInPlaceTest::initPrograms (SourceCollections& sourceCollections) const
1387 std::ostringstream src;
1388 src << "#version 310 es\n"
1389 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1390 << "layout(binding = 0) buffer InOut {\n"
1391 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1393 << "void main (void) {\n"
1394 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1395 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1396 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1397 << " uint offset = numValuesPerInv*groupNdx;\n"
1399 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1400 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1403 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1406 TestInstance* InvertSSBOInPlaceTest::createInstance (Context& context) const
1408 return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize);
1411 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance (Context& context,
1412 const deUint32 numValues,
1413 const tcu::IVec3& localSize,
1414 const tcu::IVec3& workSize)
1415 : TestInstance (context)
1416 , m_numValues (numValues)
1417 , m_localSize (localSize)
1418 , m_workSize (workSize)
1422 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate (void)
1424 const DeviceInterface& vk = m_context.getDeviceInterface();
1425 const VkDevice device = m_context.getDevice();
1426 const VkQueue queue = m_context.getUniversalQueue();
1427 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1428 Allocator& allocator = m_context.getDefaultAllocator();
1430 // Create an input/output buffer
1432 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1433 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1435 // Fill the buffer with data
1437 typedef std::vector<deUint32> data_vector_t;
1438 data_vector_t inputData(m_numValues);
1441 de::Random rnd(0x82ce7f);
1442 const Allocation& bufferAllocation = buffer.getAllocation();
1443 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1444 for (deUint32 i = 0; i < m_numValues; ++i)
1445 inputData[i] = *bufferPtr++ = rnd.getUint32();
1447 flushMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1450 // Create descriptor set
1452 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1453 DescriptorSetLayoutBuilder()
1454 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1455 .build(vk, device));
1457 const Unique<VkDescriptorPool> descriptorPool(
1458 DescriptorPoolBuilder()
1459 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1460 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1462 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1464 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1465 DescriptorSetUpdateBuilder()
1466 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1467 .update(vk, device);
1469 // Perform the computation
1471 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1472 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1473 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1475 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1476 const void* preComputeBarriers[] = { &hostWriteBarrier };
1478 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1479 const void* postComputeBarriers[] = { &shaderWriteBarrier };
1481 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1482 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1484 // Start recording commands
1486 beginCommandBuffer(vk, *cmdBuffer);
1488 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1489 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1491 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preComputeBarriers), preComputeBarriers);
1492 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1493 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postComputeBarriers), postComputeBarriers);
1495 endCommandBuffer(vk, *cmdBuffer);
1497 // Wait for completion
1499 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1501 // Validate the results
1503 const Allocation& bufferAllocation = buffer.getAllocation();
1504 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1506 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1508 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1510 const deUint32 res = bufferPtr[ndx];
1511 const deUint32 ref = ~inputData[ndx];
1515 std::ostringstream msg;
1516 msg << "Comparison failed for InOut.values[" << ndx << "]";
1517 return tcu::TestStatus::fail(msg.str());
1520 return tcu::TestStatus::pass("Compute succeeded");
1523 class WriteToMultipleSSBOTest : public vkt::TestCase
1526 WriteToMultipleSSBOTest (tcu::TestContext& testCtx,
1527 const std::string& name,
1528 const std::string& description,
1529 const deUint32 numValues,
1531 const tcu::IVec3& localSize,
1532 const tcu::IVec3& workSize);
1534 void initPrograms (SourceCollections& sourceCollections) const;
1535 TestInstance* createInstance (Context& context) const;
1538 const deUint32 m_numValues;
1540 const tcu::IVec3 m_localSize;
1541 const tcu::IVec3 m_workSize;
1544 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1547 WriteToMultipleSSBOTestInstance (Context& context,
1548 const deUint32 numValues,
1549 const tcu::IVec3& localSize,
1550 const tcu::IVec3& workSize);
1552 tcu::TestStatus iterate (void);
1555 const deUint32 m_numValues;
1556 const tcu::IVec3 m_localSize;
1557 const tcu::IVec3 m_workSize;
1560 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest (tcu::TestContext& testCtx,
1561 const std::string& name,
1562 const std::string& description,
1563 const deUint32 numValues,
1565 const tcu::IVec3& localSize,
1566 const tcu::IVec3& workSize)
1567 : TestCase (testCtx, name, description)
1568 , m_numValues (numValues)
1570 , m_localSize (localSize)
1571 , m_workSize (workSize)
1573 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1576 void WriteToMultipleSSBOTest::initPrograms (SourceCollections& sourceCollections) const
1578 std::ostringstream src;
1579 src << "#version 310 es\n"
1580 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1581 << "layout(binding = 0) writeonly buffer Out0 {\n"
1582 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1584 << "layout(binding = 1) writeonly buffer Out1 {\n"
1585 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1587 << "void main (void) {\n"
1588 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1589 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1592 << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1593 << " uint offset = numValuesPerInv*groupNdx;\n"
1595 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1596 << " sb_out0.values[offset + ndx] = offset + ndx;\n"
1599 << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1600 << " uint offset = numValuesPerInv*groupNdx;\n"
1602 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1603 << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1607 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1610 TestInstance* WriteToMultipleSSBOTest::createInstance (Context& context) const
1612 return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize);
1615 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance (Context& context,
1616 const deUint32 numValues,
1617 const tcu::IVec3& localSize,
1618 const tcu::IVec3& workSize)
1619 : TestInstance (context)
1620 , m_numValues (numValues)
1621 , m_localSize (localSize)
1622 , m_workSize (workSize)
1626 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate (void)
1628 const DeviceInterface& vk = m_context.getDeviceInterface();
1629 const VkDevice device = m_context.getDevice();
1630 const VkQueue queue = m_context.getUniversalQueue();
1631 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1632 Allocator& allocator = m_context.getDefaultAllocator();
1634 // Create two output buffers
1636 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1637 const Buffer buffer0(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1638 const Buffer buffer1(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1640 // Create descriptor set
1642 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1643 DescriptorSetLayoutBuilder()
1644 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1645 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1646 .build(vk, device));
1648 const Unique<VkDescriptorPool> descriptorPool(
1649 DescriptorPoolBuilder()
1650 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1651 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1653 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1655 const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1656 const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1657 DescriptorSetUpdateBuilder()
1658 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1659 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1660 .update(vk, device);
1662 // Perform the computation
1664 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1665 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1666 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1668 const VkBufferMemoryBarrier shaderWriteBarrier0 = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes);
1669 const VkBufferMemoryBarrier shaderWriteBarrier1 = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes);
1670 const void* postComputeBarriers[] = { &shaderWriteBarrier0, &shaderWriteBarrier1 };
1672 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1673 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1675 // Start recording commands
1677 beginCommandBuffer(vk, *cmdBuffer);
1679 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1680 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1682 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1683 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postComputeBarriers), postComputeBarriers);
1685 endCommandBuffer(vk, *cmdBuffer);
1687 // Wait for completion
1689 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1691 // Validate the results
1693 const Allocation& buffer0Allocation = buffer0.getAllocation();
1694 invalidateMappedMemoryRange(vk, device, buffer0Allocation.getMemory(), buffer0Allocation.getOffset(), bufferSizeBytes);
1695 const deUint32* buffer0Ptr = static_cast<deUint32*>(buffer0Allocation.getHostPtr());
1697 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1699 const deUint32 res = buffer0Ptr[ndx];
1700 const deUint32 ref = ndx;
1704 std::ostringstream msg;
1705 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1706 return tcu::TestStatus::fail(msg.str());
1711 const Allocation& buffer1Allocation = buffer1.getAllocation();
1712 invalidateMappedMemoryRange(vk, device, buffer1Allocation.getMemory(), buffer1Allocation.getOffset(), bufferSizeBytes);
1713 const deUint32* buffer1Ptr = static_cast<deUint32*>(buffer1Allocation.getHostPtr());
1715 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1717 const deUint32 res = buffer1Ptr[ndx];
1718 const deUint32 ref = m_numValues - ndx;
1722 std::ostringstream msg;
1723 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1724 return tcu::TestStatus::fail(msg.str());
1728 return tcu::TestStatus::pass("Compute succeeded");
1731 class SSBOBarrierTest : public vkt::TestCase
1734 SSBOBarrierTest (tcu::TestContext& testCtx,
1735 const std::string& name,
1736 const std::string& description,
1737 const tcu::IVec3& workSize);
1739 void initPrograms (SourceCollections& sourceCollections) const;
1740 TestInstance* createInstance (Context& context) const;
1743 const tcu::IVec3 m_workSize;
1746 class SSBOBarrierTestInstance : public vkt::TestInstance
1749 SSBOBarrierTestInstance (Context& context,
1750 const tcu::IVec3& workSize);
1752 tcu::TestStatus iterate (void);
1755 const tcu::IVec3 m_workSize;
1758 SSBOBarrierTest::SSBOBarrierTest (tcu::TestContext& testCtx,
1759 const std::string& name,
1760 const std::string& description,
1761 const tcu::IVec3& workSize)
1762 : TestCase (testCtx, name, description)
1763 , m_workSize (workSize)
1767 void SSBOBarrierTest::initPrograms (SourceCollections& sourceCollections) const
1769 sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
1771 "layout (local_size_x = 1) in;\n"
1772 "layout(binding = 2) readonly uniform Constants {\n"
1773 " uint u_baseVal;\n"
1775 "layout(binding = 1) writeonly buffer Output {\n"
1778 "void main (void) {\n"
1779 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1780 " values[offset] = u_baseVal + offset;\n"
1783 sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
1785 "layout (local_size_x = 1) in;\n"
1786 "layout(binding = 1) readonly buffer Input {\n"
1789 "layout(binding = 0) coherent buffer Output {\n"
1792 "void main (void) {\n"
1793 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1794 " uint value = values[offset];\n"
1795 " atomicAdd(sum, value);\n"
1799 TestInstance* SSBOBarrierTest::createInstance (Context& context) const
1801 return new SSBOBarrierTestInstance(context, m_workSize);
1804 SSBOBarrierTestInstance::SSBOBarrierTestInstance (Context& context, const tcu::IVec3& workSize)
1805 : TestInstance (context)
1806 , m_workSize (workSize)
1810 tcu::TestStatus SSBOBarrierTestInstance::iterate (void)
1812 const DeviceInterface& vk = m_context.getDeviceInterface();
1813 const VkDevice device = m_context.getDevice();
1814 const VkQueue queue = m_context.getUniversalQueue();
1815 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1816 Allocator& allocator = m_context.getDefaultAllocator();
1818 // Create a work buffer used by both shaders
1820 const int workGroupCount = multiplyComponents(m_workSize);
1821 const VkDeviceSize workBufferSizeBytes = sizeof(deUint32) * workGroupCount;
1822 const Buffer workBuffer(vk, device, allocator, makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any);
1824 // Create an output buffer
1826 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
1827 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1829 // Create a uniform buffer (to pass uniform constants)
1831 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
1832 const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
1834 // Set the constants in the uniform buffer
1836 const deUint32 baseValue = 127;
1838 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
1839 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
1840 uniformBufferPtr[0] = baseValue;
1842 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
1845 // Create descriptor set
1847 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1848 DescriptorSetLayoutBuilder()
1849 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1850 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1851 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1852 .build(vk, device));
1854 const Unique<VkDescriptorPool> descriptorPool(
1855 DescriptorPoolBuilder()
1856 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1857 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1858 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1860 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1862 const VkDescriptorBufferInfo workBufferDescriptorInfo = makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1863 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1864 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1865 DescriptorSetUpdateBuilder()
1866 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1867 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1868 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1869 .update(vk, device);
1871 // Perform the computation
1873 const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
1874 const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
1876 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1877 const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
1878 const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
1880 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
1881 const void* barriersBeforeCompute[] = { &writeUniformConstantsBarrier };
1883 const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
1884 const void* barriersAfterFirstShader[] = { &betweenShadersBarrier };
1886 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
1887 const void* barriersAfterCompute[] = { &afterComputeBarrier };
1889 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1890 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1892 // Start recording commands
1894 beginCommandBuffer(vk, *cmdBuffer);
1896 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
1897 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1899 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersBeforeCompute), barriersBeforeCompute);
1901 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1902 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersAfterFirstShader), barriersAfterFirstShader);
1904 // Switch to the second shader program
1905 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
1907 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1908 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersAfterCompute), barriersAfterCompute);
1910 endCommandBuffer(vk, *cmdBuffer);
1912 // Wait for completion
1914 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1916 // Validate the results
1918 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1919 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1921 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1922 const deUint32 res = *bufferPtr;
1925 for (int ndx = 0; ndx < workGroupCount; ++ndx)
1926 ref += baseValue + ndx;
1930 std::ostringstream msg;
1931 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
1932 return tcu::TestStatus::fail(msg.str());
1934 return tcu::TestStatus::pass("Compute succeeded");
1937 class ImageAtomicOpTest : public vkt::TestCase
1940 ImageAtomicOpTest (tcu::TestContext& testCtx,
1941 const std::string& name,
1942 const std::string& description,
1943 const deUint32 localSize,
1944 const tcu::IVec2& imageSize);
1946 void initPrograms (SourceCollections& sourceCollections) const;
1947 TestInstance* createInstance (Context& context) const;
1950 const deUint32 m_localSize;
1951 const tcu::IVec2 m_imageSize;
1954 class ImageAtomicOpTestInstance : public vkt::TestInstance
1957 ImageAtomicOpTestInstance (Context& context,
1958 const deUint32 localSize,
1959 const tcu::IVec2& imageSize);
1961 tcu::TestStatus iterate (void);
1964 const deUint32 m_localSize;
1965 const tcu::IVec2 m_imageSize;
1968 ImageAtomicOpTest::ImageAtomicOpTest (tcu::TestContext& testCtx,
1969 const std::string& name,
1970 const std::string& description,
1971 const deUint32 localSize,
1972 const tcu::IVec2& imageSize)
1973 : TestCase (testCtx, name, description)
1974 , m_localSize (localSize)
1975 , m_imageSize (imageSize)
1979 void ImageAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
1981 std::ostringstream src;
1982 src << "#version 310 es\n"
1983 << "#extension GL_OES_shader_image_atomic : require\n"
1984 << "layout (local_size_x = " << m_localSize << ") in;\n"
1985 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
1986 << "layout(binding = 0) readonly buffer Input {\n"
1987 << " uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
1989 << "void main (void) {\n"
1990 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1991 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1993 << " if (gl_LocalInvocationIndex == 0u)\n"
1994 << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1995 << " memoryBarrierImage();\n"
1997 << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
2000 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
2003 TestInstance* ImageAtomicOpTest::createInstance (Context& context) const
2005 return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize);
2008 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance (Context& context, const deUint32 localSize, const tcu::IVec2& imageSize)
2009 : TestInstance (context)
2010 , m_localSize (localSize)
2011 , m_imageSize (imageSize)
2015 tcu::TestStatus ImageAtomicOpTestInstance::iterate (void)
2017 const DeviceInterface& vk = m_context.getDeviceInterface();
2018 const VkDevice device = m_context.getDevice();
2019 const VkQueue queue = m_context.getUniversalQueue();
2020 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2021 Allocator& allocator = m_context.getDefaultAllocator();
2025 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2026 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2028 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2029 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2033 const deUint32 numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2034 const VkDeviceSize inputBufferSizeBytes = sizeof(deUint32) * numInputValues;
2036 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
2038 // Populate the input buffer with test data
2040 de::Random rnd(0x77238ac2);
2041 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
2042 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
2043 for (deUint32 i = 0; i < numInputValues; ++i)
2044 *bufferPtr++ = rnd.getUint32();
2046 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), inputBufferSizeBytes);
2049 // Create a buffer to store shader output (copied from image data)
2051 const deUint32 imageArea = multiplyComponents(m_imageSize);
2052 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32) * imageArea;
2053 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
2055 // Create descriptor set
2057 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2058 DescriptorSetLayoutBuilder()
2059 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2060 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2061 .build(vk, device));
2063 const Unique<VkDescriptorPool> descriptorPool(
2064 DescriptorPoolBuilder()
2065 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2066 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2067 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2069 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2073 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2074 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2076 DescriptorSetUpdateBuilder()
2077 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2078 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2079 .update(vk, device);
2081 // Perform the computation
2083 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
2084 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2085 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2087 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2089 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
2090 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
2091 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2092 *image, subresourceRange);
2094 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2096 const void* preComputeBarriers[] = { &inputBufferPostHostWriteBarrier };
2097 const void* preCopyBarriers[] = { &imagePreCopyBarrier };
2098 const void* postCopyBarriers[] = { &outputBufferPostCopyBarrier };
2100 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
2102 // Prepare the command buffer
2104 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2105 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2107 // Start recording commands
2109 beginCommandBuffer(vk, *cmdBuffer);
2111 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2112 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2114 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preComputeBarriers), preComputeBarriers);
2115 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2117 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(preCopyBarriers), preCopyBarriers);
2118 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, ©Params);
2119 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(postCopyBarriers), postCopyBarriers);
2121 endCommandBuffer(vk, *cmdBuffer);
2123 // Wait for completion
2125 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2128 // Validate the results
2130 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2131 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2133 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2134 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
2136 for (deUint32 pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2138 const deUint32 res = bufferPtr[pixelNdx];
2141 for (deUint32 offs = 0; offs < m_localSize; ++offs)
2142 ref += refBufferPtr[pixelNdx * m_localSize + offs];
2146 std::ostringstream msg;
2147 msg << "Comparison failed for pixel " << pixelNdx;
2148 return tcu::TestStatus::fail(msg.str());
2151 return tcu::TestStatus::pass("Compute succeeded");
2154 class ImageBarrierTest : public vkt::TestCase
2157 ImageBarrierTest (tcu::TestContext& testCtx,
2158 const std::string& name,
2159 const std::string& description,
2160 const tcu::IVec2& imageSize);
2162 void initPrograms (SourceCollections& sourceCollections) const;
2163 TestInstance* createInstance (Context& context) const;
2166 const tcu::IVec2 m_imageSize;
2169 class ImageBarrierTestInstance : public vkt::TestInstance
2172 ImageBarrierTestInstance (Context& context,
2173 const tcu::IVec2& imageSize);
2175 tcu::TestStatus iterate (void);
2178 const tcu::IVec2 m_imageSize;
2181 ImageBarrierTest::ImageBarrierTest (tcu::TestContext& testCtx,
2182 const std::string& name,
2183 const std::string& description,
2184 const tcu::IVec2& imageSize)
2185 : TestCase (testCtx, name, description)
2186 , m_imageSize (imageSize)
2190 void ImageBarrierTest::initPrograms (SourceCollections& sourceCollections) const
2192 sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
2194 "layout (local_size_x = 1) in;\n"
2195 "layout(binding = 2) readonly uniform Constants {\n"
2196 " uint u_baseVal;\n"
2198 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2199 "void main (void) {\n"
2200 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2201 " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2204 sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
2206 "layout (local_size_x = 1) in;\n"
2207 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2208 "layout(binding = 0) coherent buffer Output {\n"
2211 "void main (void) {\n"
2212 " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2213 " atomicAdd(sum, value);\n"
2217 TestInstance* ImageBarrierTest::createInstance (Context& context) const
2219 return new ImageBarrierTestInstance(context, m_imageSize);
2222 ImageBarrierTestInstance::ImageBarrierTestInstance (Context& context, const tcu::IVec2& imageSize)
2223 : TestInstance (context)
2224 , m_imageSize (imageSize)
2228 tcu::TestStatus ImageBarrierTestInstance::iterate (void)
2230 const DeviceInterface& vk = m_context.getDeviceInterface();
2231 const VkDevice device = m_context.getDevice();
2232 const VkQueue queue = m_context.getUniversalQueue();
2233 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2234 Allocator& allocator = m_context.getDefaultAllocator();
2236 // Create an image used by both shaders
2238 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2239 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2241 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2242 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2244 // Create an output buffer
2246 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
2247 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2249 // Create a uniform buffer (to pass uniform constants)
2251 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
2252 const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2254 // Set the constants in the uniform buffer
2256 const deUint32 baseValue = 127;
2258 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
2259 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
2260 uniformBufferPtr[0] = baseValue;
2262 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
2265 // Create descriptor set
2267 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2268 DescriptorSetLayoutBuilder()
2269 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2270 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2271 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2272 .build(vk, device));
2274 const Unique<VkDescriptorPool> descriptorPool(
2275 DescriptorPoolBuilder()
2276 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2277 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2278 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2279 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2281 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2283 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2284 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2285 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2286 DescriptorSetUpdateBuilder()
2287 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2288 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2289 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2290 .update(vk, device);
2292 // Perform the computation
2294 const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
2295 const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
2297 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2298 const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
2299 const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
2301 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2303 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2305 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2306 *image, subresourceRange);
2308 const void* barriersBeforeCompute[] = { &writeUniformConstantsBarrier, &imageLayoutBarrier };
2310 const VkImageMemoryBarrier imageBarrierBetweenShaders = makeImageMemoryBarrier(
2311 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
2312 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2313 *image, subresourceRange);
2315 const void* barriersAfterFirstShader[] = { &imageBarrierBetweenShaders };
2317 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2318 const void* barriersAfterCompute[] = { &afterComputeBarrier };
2320 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2321 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2323 // Start recording commands
2325 beginCommandBuffer(vk, *cmdBuffer);
2327 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
2328 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2330 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersBeforeCompute), barriersBeforeCompute);
2332 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2333 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersAfterFirstShader), barriersAfterFirstShader);
2335 // Switch to the second shader program
2336 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
2338 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2339 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, DE_LENGTH_OF_ARRAY(barriersAfterCompute), barriersAfterCompute);
2341 endCommandBuffer(vk, *cmdBuffer);
2343 // Wait for completion
2345 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2347 // Validate the results
2349 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2350 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2352 const int numValues = multiplyComponents(m_imageSize);
2353 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2354 const deUint32 res = *bufferPtr;
2357 for (int ndx = 0; ndx < numValues; ++ndx)
2358 ref += baseValue + ndx;
2362 std::ostringstream msg;
2363 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2364 return tcu::TestStatus::fail(msg.str());
2366 return tcu::TestStatus::pass("Compute succeeded");
2369 namespace EmptyShaderTest
2372 void createProgram (SourceCollections& dst)
2374 dst.glslSources.add("comp") << glu::ComputeSource(
2376 "layout (local_size_x = 1) in;\n"
2377 "void main (void) {}\n"
2381 tcu::TestStatus createTest (Context& context)
2383 const DeviceInterface& vk = context.getDeviceInterface();
2384 const VkDevice device = context.getDevice();
2385 const VkQueue queue = context.getUniversalQueue();
2386 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2388 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u));
2390 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device));
2391 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2393 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2394 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2396 // Start recording commands
2398 beginCommandBuffer(vk, *cmdBuffer);
2400 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2402 const tcu::IVec3 workGroups(1, 1, 1);
2403 vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
2405 endCommandBuffer(vk, *cmdBuffer);
2407 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2409 return tcu::TestStatus::pass("Compute succeeded");
2412 } // EmptyShaderTest ns
2415 tcu::TestCaseGroup* createBasicComputeShaderTests (tcu::TestContext& testCtx)
2417 de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic", "Basic compute tests"));
2419 addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", "Shader that does nothing", EmptyShaderTest::createProgram, EmptyShaderTest::createTest);
2421 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_invocation", "Copy from UBO to SSBO, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2422 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_group", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(2,1,4), tcu::IVec3(1,1,1)));
2423 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_invocations", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1)));
2424 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_groups", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2426 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_single_invocation", "Copy between SSBOs, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2427 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_invocations", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1)));
2428 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_groups", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2430 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_single_invocation", "Read and write same SSBO", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2431 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_multiple_groups", "Read and write same SSBO", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2432 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_single_invocation", "Read and write same SSBO", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2433 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_multiple_groups", "Read and write same SSBO", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2435 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_single_invocation", "Write to multiple SSBOs", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2436 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_multiple_groups", "Write to multiple SSBOs", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2437 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2438 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_multiple_groups", "Write to multiple SSBOs", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2440 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_invocation", "SSBO local barrier usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2441 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_group", "SSBO local barrier usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2442 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_multiple_groups", "SSBO local barrier usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2444 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_single", "SSBO memory barrier usage", tcu::IVec3(1,1,1)));
2445 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_multiple", "SSBO memory barrier usage", tcu::IVec3(11,5,7)));
2447 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_invocation", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2448 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_group", "Basic shared variable usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2449 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_invocations", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4)));
2450 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_groups", "Basic shared variable usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2452 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_invocation", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2453 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_group", "Atomic operation with shared var", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2454 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_invocations", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4)));
2455 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_groups", "Atomic operation with shared var", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2457 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_small", "Image to SSBO copy", tcu::IVec2(1,1), tcu::IVec2(64,64)));
2458 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_large", "Image to SSBO copy", tcu::IVec2(2,4), tcu::IVec2(512,512)));
2460 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_small", "SSBO to image copy", tcu::IVec2(1, 1), tcu::IVec2(64, 64)));
2461 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_large", "SSBO to image copy", tcu::IVec2(2, 4), tcu::IVec2(512, 512)));
2463 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_1", "Atomic operation with image", 1, tcu::IVec2(64,64)));
2464 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_8", "Atomic operation with image", 8, tcu::IVec2(64,64)));
2466 basicComputeTests->addChild(new ImageBarrierTest(testCtx, "image_barrier_single", "Image barrier", tcu::IVec2(1,1)));
2467 basicComputeTests->addChild(new ImageBarrierTest(testCtx, "image_barrier_multiple", "Image barrier", tcu::IVec2(64,64)));
2469 return basicComputeTests.release();