1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2015 Google Inc.
6 * Copyright (c) 2015 Mobica Ltd.
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and/or associated documentation files (the
10 * "Materials"), to deal in the Materials without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sublicense, and/or sell copies of the Materials, and to
13 * permit persons to whom the Materials are furnished to do so, subject to
14 * the following conditions:
16 * The above copyright notice(s) and this permission notice shall be included
17 * in all copies or substantial portions of the Materials.
19 * The Materials are Confidential Information as defined by the
20 * Khronos Membership Agreement until designated non-confidential by Khronos,
21 * at which point this condition clause shall be removed.
23 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
26 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
27 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
28 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
29 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
33 * \brief Compute Shader Tests
34 *//*--------------------------------------------------------------------*/
36 #include "vktComputeBasicComputeShaderTests.hpp"
37 #include "vktTestCase.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "vktComputeTestsUtil.hpp"
43 #include "vkRefUtil.hpp"
44 #include "vkPlatform.hpp"
45 #include "vkPrograms.hpp"
46 #include "vkRefUtil.hpp"
47 #include "vkMemUtil.hpp"
48 #include "vkQueryUtil.hpp"
49 #include "vkBuilderUtil.hpp"
50 #include "vkTypeUtil.hpp"
52 #include "deStringUtil.hpp"
53 #include "deUniquePtr.hpp"
54 #include "deRandom.hpp"
67 template<typename T, int size>
68 T multiplyComponents (const tcu::Vector<T, size>& v)
71 for (int i = 0; i < size; ++i)
77 inline T squared (const T& a)
82 inline VkImageCreateInfo make2DImageCreateInfo (const tcu::IVec2& imageSize, const VkImageUsageFlags usage)
84 const VkImageCreateInfo imageParams =
86 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
87 DE_NULL, // const void* pNext;
88 0u, // VkImageCreateFlags flags;
89 VK_IMAGE_TYPE_2D, // VkImageType imageType;
90 VK_FORMAT_R32_UINT, // VkFormat format;
91 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), // VkExtent3D extent;
92 1u, // deUint32 mipLevels;
93 1u, // deUint32 arrayLayers;
94 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
95 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
96 usage, // VkImageUsageFlags usage;
97 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
98 0u, // deUint32 queueFamilyIndexCount;
99 DE_NULL, // const deUint32* pQueueFamilyIndices;
100 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
105 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2& imageSize)
107 return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
116 class SharedVarTest : public vkt::TestCase
119 SharedVarTest (tcu::TestContext& testCtx,
120 const std::string& name,
121 const std::string& description,
122 const tcu::IVec3& localSize,
123 const tcu::IVec3& workSize);
125 void initPrograms (SourceCollections& sourceCollections) const;
126 TestInstance* createInstance (Context& context) const;
129 const tcu::IVec3 m_localSize;
130 const tcu::IVec3 m_workSize;
133 class SharedVarTestInstance : public vkt::TestInstance
136 SharedVarTestInstance (Context& context,
137 const tcu::IVec3& localSize,
138 const tcu::IVec3& workSize);
140 tcu::TestStatus iterate (void);
143 const tcu::IVec3 m_localSize;
144 const tcu::IVec3 m_workSize;
147 SharedVarTest::SharedVarTest (tcu::TestContext& testCtx,
148 const std::string& name,
149 const std::string& description,
150 const tcu::IVec3& localSize,
151 const tcu::IVec3& workSize)
152 : TestCase (testCtx, name, description)
153 , m_localSize (localSize)
154 , m_workSize (workSize)
158 void SharedVarTest::initPrograms (SourceCollections& sourceCollections) const
160 const int workGroupSize = multiplyComponents(m_localSize);
161 const int workGroupCount = multiplyComponents(m_workSize);
162 const int numValues = workGroupSize * workGroupCount;
164 std::ostringstream src;
165 src << "#version 310 es\n"
166 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
167 << "layout(binding = 0) writeonly buffer Output {\n"
168 << " uint values[" << numValues << "];\n"
170 << "shared uint offsets[" << workGroupSize << "];\n\n"
171 << "void main (void) {\n"
172 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
173 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
174 << " uint globalOffs = localSize*globalNdx;\n"
175 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
177 << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
178 << " memoryBarrierShared();\n"
180 << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
183 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
186 TestInstance* SharedVarTest::createInstance (Context& context) const
188 return new SharedVarTestInstance(context, m_localSize, m_workSize);
191 SharedVarTestInstance::SharedVarTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
192 : TestInstance (context)
193 , m_localSize (localSize)
194 , m_workSize (workSize)
198 tcu::TestStatus SharedVarTestInstance::iterate (void)
200 const DeviceInterface& vk = m_context.getDeviceInterface();
201 const VkDevice device = m_context.getDevice();
202 const VkQueue queue = m_context.getUniversalQueue();
203 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
204 Allocator& allocator = m_context.getDefaultAllocator();
206 const int workGroupSize = multiplyComponents(m_localSize);
207 const int workGroupCount = multiplyComponents(m_workSize);
209 // Create a buffer and host-visible memory for it
211 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
212 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
214 // Create descriptor set
216 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
217 DescriptorSetLayoutBuilder()
218 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
221 const Unique<VkDescriptorPool> descriptorPool(
222 DescriptorPoolBuilder()
223 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
224 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
226 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
228 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
229 DescriptorSetUpdateBuilder()
230 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
233 // Perform the computation
235 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
236 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
237 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
239 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
241 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
242 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
244 // Start recording commands
246 beginCommandBuffer(vk, *cmdBuffer);
248 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
249 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
251 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
253 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
255 endCommandBuffer(vk, *cmdBuffer);
257 // Wait for completion
259 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
261 // Validate the results
263 const Allocation& bufferAllocation = buffer.getAllocation();
264 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
266 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
268 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
270 const int globalOffset = groupNdx * workGroupSize;
271 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
273 const deUint32 res = bufferPtr[globalOffset + localOffset];
274 const deUint32 ref = globalOffset + squared(workGroupSize - localOffset - 1);
278 std::ostringstream msg;
279 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
280 return tcu::TestStatus::fail(msg.str());
284 return tcu::TestStatus::pass("Compute succeeded");
287 class SharedVarAtomicOpTest : public vkt::TestCase
290 SharedVarAtomicOpTest (tcu::TestContext& testCtx,
291 const std::string& name,
292 const std::string& description,
293 const tcu::IVec3& localSize,
294 const tcu::IVec3& workSize);
296 void initPrograms (SourceCollections& sourceCollections) const;
297 TestInstance* createInstance (Context& context) const;
300 const tcu::IVec3 m_localSize;
301 const tcu::IVec3 m_workSize;
304 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
307 SharedVarAtomicOpTestInstance (Context& context,
308 const tcu::IVec3& localSize,
309 const tcu::IVec3& workSize);
311 tcu::TestStatus iterate (void);
314 const tcu::IVec3 m_localSize;
315 const tcu::IVec3 m_workSize;
318 SharedVarAtomicOpTest::SharedVarAtomicOpTest (tcu::TestContext& testCtx,
319 const std::string& name,
320 const std::string& description,
321 const tcu::IVec3& localSize,
322 const tcu::IVec3& workSize)
323 : TestCase (testCtx, name, description)
324 , m_localSize (localSize)
325 , m_workSize (workSize)
329 void SharedVarAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
331 const int workGroupSize = multiplyComponents(m_localSize);
332 const int workGroupCount = multiplyComponents(m_workSize);
333 const int numValues = workGroupSize * workGroupCount;
335 std::ostringstream src;
336 src << "#version 310 es\n"
337 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
338 << "layout(binding = 0) writeonly buffer Output {\n"
339 << " uint values[" << numValues << "];\n"
341 << "shared uint count;\n\n"
342 << "void main (void) {\n"
343 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
344 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
345 << " uint globalOffs = localSize*globalNdx;\n"
348 << " memoryBarrierShared();\n"
350 << " uint oldVal = atomicAdd(count, 1u);\n"
351 << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
354 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
357 TestInstance* SharedVarAtomicOpTest::createInstance (Context& context) const
359 return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize);
362 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
363 : TestInstance (context)
364 , m_localSize (localSize)
365 , m_workSize (workSize)
369 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate (void)
371 const DeviceInterface& vk = m_context.getDeviceInterface();
372 const VkDevice device = m_context.getDevice();
373 const VkQueue queue = m_context.getUniversalQueue();
374 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
375 Allocator& allocator = m_context.getDefaultAllocator();
377 const int workGroupSize = multiplyComponents(m_localSize);
378 const int workGroupCount = multiplyComponents(m_workSize);
380 // Create a buffer and host-visible memory for it
382 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
383 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
385 // Create descriptor set
387 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
388 DescriptorSetLayoutBuilder()
389 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
392 const Unique<VkDescriptorPool> descriptorPool(
393 DescriptorPoolBuilder()
394 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
395 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
397 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
399 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
400 DescriptorSetUpdateBuilder()
401 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
404 // Perform the computation
406 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
407 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
408 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
410 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
412 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
413 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
415 // Start recording commands
417 beginCommandBuffer(vk, *cmdBuffer);
419 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
420 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
422 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
424 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1u, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
426 endCommandBuffer(vk, *cmdBuffer);
428 // Wait for completion
430 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
432 // Validate the results
434 const Allocation& bufferAllocation = buffer.getAllocation();
435 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
437 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
439 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
441 const int globalOffset = groupNdx * workGroupSize;
442 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
444 const deUint32 res = bufferPtr[globalOffset + localOffset];
445 const deUint32 ref = localOffset + 1;
449 std::ostringstream msg;
450 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
451 return tcu::TestStatus::fail(msg.str());
455 return tcu::TestStatus::pass("Compute succeeded");
458 class SSBOLocalBarrierTest : public vkt::TestCase
461 SSBOLocalBarrierTest (tcu::TestContext& testCtx,
462 const std::string& name,
463 const std::string& description,
464 const tcu::IVec3& localSize,
465 const tcu::IVec3& workSize);
467 void initPrograms (SourceCollections& sourceCollections) const;
468 TestInstance* createInstance (Context& context) const;
471 const tcu::IVec3 m_localSize;
472 const tcu::IVec3 m_workSize;
475 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
478 SSBOLocalBarrierTestInstance (Context& context,
479 const tcu::IVec3& localSize,
480 const tcu::IVec3& workSize);
482 tcu::TestStatus iterate (void);
485 const tcu::IVec3 m_localSize;
486 const tcu::IVec3 m_workSize;
489 SSBOLocalBarrierTest::SSBOLocalBarrierTest (tcu::TestContext& testCtx,
490 const std::string& name,
491 const std::string& description,
492 const tcu::IVec3& localSize,
493 const tcu::IVec3& workSize)
494 : TestCase (testCtx, name, description)
495 , m_localSize (localSize)
496 , m_workSize (workSize)
500 void SSBOLocalBarrierTest::initPrograms (SourceCollections& sourceCollections) const
502 const int workGroupSize = multiplyComponents(m_localSize);
503 const int workGroupCount = multiplyComponents(m_workSize);
504 const int numValues = workGroupSize * workGroupCount;
506 std::ostringstream src;
507 src << "#version 310 es\n"
508 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
509 << "layout(binding = 0) coherent buffer Output {\n"
510 << " uint values[" << numValues << "];\n"
512 << "void main (void) {\n"
513 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
514 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
515 << " uint globalOffs = localSize*globalNdx;\n"
516 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
518 << " sb_out.values[globalOffs + localOffs] = globalOffs;\n"
519 << " memoryBarrierBuffer();\n"
521 << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" // += so we read and write
522 << " memoryBarrierBuffer();\n"
524 << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
527 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
530 TestInstance* SSBOLocalBarrierTest::createInstance (Context& context) const
532 return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize);
535 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
536 : TestInstance (context)
537 , m_localSize (localSize)
538 , m_workSize (workSize)
542 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate (void)
544 const DeviceInterface& vk = m_context.getDeviceInterface();
545 const VkDevice device = m_context.getDevice();
546 const VkQueue queue = m_context.getUniversalQueue();
547 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
548 Allocator& allocator = m_context.getDefaultAllocator();
550 const int workGroupSize = multiplyComponents(m_localSize);
551 const int workGroupCount = multiplyComponents(m_workSize);
553 // Create a buffer and host-visible memory for it
555 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
556 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
558 // Create descriptor set
560 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
561 DescriptorSetLayoutBuilder()
562 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
565 const Unique<VkDescriptorPool> descriptorPool(
566 DescriptorPoolBuilder()
567 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
568 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
570 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
572 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
573 DescriptorSetUpdateBuilder()
574 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
577 // Perform the computation
579 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
580 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
581 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
583 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
585 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
586 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
588 // Start recording commands
590 beginCommandBuffer(vk, *cmdBuffer);
592 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
593 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
595 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
597 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
599 endCommandBuffer(vk, *cmdBuffer);
601 // Wait for completion
603 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
605 // Validate the results
607 const Allocation& bufferAllocation = buffer.getAllocation();
608 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
610 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
612 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
614 const int globalOffset = groupNdx * workGroupSize;
615 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
617 const deUint32 res = bufferPtr[globalOffset + localOffset];
618 const int offs0 = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) : ((localOffset - 1) % workGroupSize);
619 const int offs1 = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) : ((localOffset - 2) % workGroupSize);
620 const deUint32 ref = static_cast<deUint32>(globalOffset + offs0 + offs1);
624 std::ostringstream msg;
625 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
626 return tcu::TestStatus::fail(msg.str());
630 return tcu::TestStatus::pass("Compute succeeded");
633 class CopyImageToSSBOTest : public vkt::TestCase
636 CopyImageToSSBOTest (tcu::TestContext& testCtx,
637 const std::string& name,
638 const std::string& description,
639 const tcu::IVec2& localSize,
640 const tcu::IVec2& imageSize);
642 void initPrograms (SourceCollections& sourceCollections) const;
643 TestInstance* createInstance (Context& context) const;
646 const tcu::IVec2 m_localSize;
647 const tcu::IVec2 m_imageSize;
650 class CopyImageToSSBOTestInstance : public vkt::TestInstance
653 CopyImageToSSBOTestInstance (Context& context,
654 const tcu::IVec2& localSize,
655 const tcu::IVec2& imageSize);
657 tcu::TestStatus iterate (void);
660 const tcu::IVec2 m_localSize;
661 const tcu::IVec2 m_imageSize;
664 CopyImageToSSBOTest::CopyImageToSSBOTest (tcu::TestContext& testCtx,
665 const std::string& name,
666 const std::string& description,
667 const tcu::IVec2& localSize,
668 const tcu::IVec2& imageSize)
669 : TestCase (testCtx, name, description)
670 , m_localSize (localSize)
671 , m_imageSize (imageSize)
673 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
674 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
677 void CopyImageToSSBOTest::initPrograms (SourceCollections& sourceCollections) const
679 std::ostringstream src;
680 src << "#version 310 es\n"
681 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
682 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
683 << "layout(binding = 0) writeonly buffer Output {\n"
684 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
686 << "void main (void) {\n"
687 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
688 << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
689 << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
692 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
695 TestInstance* CopyImageToSSBOTest::createInstance (Context& context) const
697 return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize);
700 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
701 : TestInstance (context)
702 , m_localSize (localSize)
703 , m_imageSize (imageSize)
707 tcu::TestStatus CopyImageToSSBOTestInstance::iterate (void)
709 const DeviceInterface& vk = m_context.getDeviceInterface();
710 const VkDevice device = m_context.getDevice();
711 const VkQueue queue = m_context.getUniversalQueue();
712 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
713 Allocator& allocator = m_context.getDefaultAllocator();
717 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
718 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
720 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
721 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
723 // Staging buffer (source data for image)
725 const deUint32 imageArea = multiplyComponents(m_imageSize);
726 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
728 const Buffer stagingBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
730 // Populate the staging buffer with test data
732 de::Random rnd(0xab2c7);
733 const Allocation& stagingBufferAllocation = stagingBuffer.getAllocation();
734 deUint32* bufferPtr = static_cast<deUint32*>(stagingBufferAllocation.getHostPtr());
735 for (deUint32 i = 0; i < imageArea; ++i)
736 *bufferPtr++ = rnd.getUint32();
738 flushMappedMemoryRange(vk, device, stagingBufferAllocation.getMemory(), stagingBufferAllocation.getOffset(), bufferSizeBytes);
741 // Create a buffer to store shader output
743 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
745 // Create descriptor set
747 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
748 DescriptorSetLayoutBuilder()
749 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
750 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
753 const Unique<VkDescriptorPool> descriptorPool(
754 DescriptorPoolBuilder()
755 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
756 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
757 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
759 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
763 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
764 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
766 DescriptorSetUpdateBuilder()
767 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
768 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
771 // Perform the computation
773 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
774 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
775 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
777 const VkBufferMemoryBarrier stagingBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *stagingBuffer, 0ull, bufferSizeBytes);
779 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
781 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
782 *image, subresourceRange);
784 const VkImageMemoryBarrier imagePostCopyBarrier = makeImageMemoryBarrier(
785 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
786 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
787 *image, subresourceRange);
789 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
791 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
792 const tcu::IVec2 workSize = m_imageSize / m_localSize;
794 // Prepare the command buffer
796 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
797 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
799 // Start recording commands
801 beginCommandBuffer(vk, *cmdBuffer);
803 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
804 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
806 vk.cmdPipelineBarrier(*cmdBuffer, 0u, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &stagingBufferPostHostWriteBarrier, 1, &imagePreCopyBarrier);
807 vk.cmdCopyBufferToImage(*cmdBuffer, *stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, ©Params);
808 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePostCopyBarrier);
810 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
811 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
813 endCommandBuffer(vk, *cmdBuffer);
815 // Wait for completion
817 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
820 // Validate the results
822 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
823 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
825 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
826 const deUint32* refBufferPtr = static_cast<deUint32*>(stagingBuffer.getAllocation().getHostPtr());
828 for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
830 const deUint32 res = *(bufferPtr + ndx);
831 const deUint32 ref = *(refBufferPtr + ndx);
835 std::ostringstream msg;
836 msg << "Comparison failed for Output.values[" << ndx << "]";
837 return tcu::TestStatus::fail(msg.str());
840 return tcu::TestStatus::pass("Compute succeeded");
843 class CopySSBOToImageTest : public vkt::TestCase
846 CopySSBOToImageTest (tcu::TestContext& testCtx,
847 const std::string& name,
848 const std::string& description,
849 const tcu::IVec2& localSize,
850 const tcu::IVec2& imageSize);
852 void initPrograms (SourceCollections& sourceCollections) const;
853 TestInstance* createInstance (Context& context) const;
856 const tcu::IVec2 m_localSize;
857 const tcu::IVec2 m_imageSize;
860 class CopySSBOToImageTestInstance : public vkt::TestInstance
863 CopySSBOToImageTestInstance (Context& context,
864 const tcu::IVec2& localSize,
865 const tcu::IVec2& imageSize);
867 tcu::TestStatus iterate (void);
870 const tcu::IVec2 m_localSize;
871 const tcu::IVec2 m_imageSize;
874 CopySSBOToImageTest::CopySSBOToImageTest (tcu::TestContext& testCtx,
875 const std::string& name,
876 const std::string& description,
877 const tcu::IVec2& localSize,
878 const tcu::IVec2& imageSize)
879 : TestCase (testCtx, name, description)
880 , m_localSize (localSize)
881 , m_imageSize (imageSize)
883 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
884 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
887 void CopySSBOToImageTest::initPrograms (SourceCollections& sourceCollections) const
889 std::ostringstream src;
890 src << "#version 310 es\n"
891 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
892 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
893 << "layout(binding = 0) readonly buffer Input {\n"
894 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
896 << "void main (void) {\n"
897 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
898 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
899 << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
902 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
905 TestInstance* CopySSBOToImageTest::createInstance (Context& context) const
907 return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize);
910 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
911 : TestInstance (context)
912 , m_localSize (localSize)
913 , m_imageSize (imageSize)
917 tcu::TestStatus CopySSBOToImageTestInstance::iterate (void)
919 const DeviceInterface& vk = m_context.getDeviceInterface();
920 const VkDevice device = m_context.getDevice();
921 const VkQueue queue = m_context.getUniversalQueue();
922 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
923 Allocator& allocator = m_context.getDefaultAllocator();
927 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
928 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
930 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
931 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
933 // Create an input buffer (data to be read in the shader)
935 const deUint32 imageArea = multiplyComponents(m_imageSize);
936 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
938 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
940 // Populate the buffer with test data
942 de::Random rnd(0x77238ac2);
943 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
944 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
945 for (deUint32 i = 0; i < imageArea; ++i)
946 *bufferPtr++ = rnd.getUint32();
948 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
951 // Create a buffer to store shader output (copied from image data)
953 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
955 // Create descriptor set
957 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
958 DescriptorSetLayoutBuilder()
959 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
960 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
963 const Unique<VkDescriptorPool> descriptorPool(
964 DescriptorPoolBuilder()
965 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
966 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
967 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
969 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
973 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
974 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
976 DescriptorSetUpdateBuilder()
977 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
978 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
981 // Perform the computation
983 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
984 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
985 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
987 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
989 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
991 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
992 *image, subresourceRange);
994 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
995 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
996 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
997 *image, subresourceRange);
999 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1001 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
1002 const tcu::IVec2 workSize = m_imageSize / m_localSize;
1004 // Prepare the command buffer
1006 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1007 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1009 // Start recording commands
1011 beginCommandBuffer(vk, *cmdBuffer);
1013 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1014 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1016 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
1017 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1019 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
1020 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, ©Params);
1021 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1023 endCommandBuffer(vk, *cmdBuffer);
1025 // Wait for completion
1027 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1030 // Validate the results
1032 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1033 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1035 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1036 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1038 for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
1040 const deUint32 res = *(bufferPtr + ndx);
1041 const deUint32 ref = *(refBufferPtr + ndx);
1045 std::ostringstream msg;
1046 msg << "Comparison failed for pixel " << ndx;
1047 return tcu::TestStatus::fail(msg.str());
1050 return tcu::TestStatus::pass("Compute succeeded");
1053 class BufferToBufferInvertTest : public vkt::TestCase
1056 void initPrograms (SourceCollections& sourceCollections) const;
1057 TestInstance* createInstance (Context& context) const;
1059 static BufferToBufferInvertTest* UBOToSSBOInvertCase (tcu::TestContext& testCtx,
1060 const std::string& name,
1061 const std::string& description,
1062 const deUint32 numValues,
1063 const tcu::IVec3& localSize,
1064 const tcu::IVec3& workSize);
1066 static BufferToBufferInvertTest* CopyInvertSSBOCase (tcu::TestContext& testCtx,
1067 const std::string& name,
1068 const std::string& description,
1069 const deUint32 numValues,
1070 const tcu::IVec3& localSize,
1071 const tcu::IVec3& workSize);
1074 BufferToBufferInvertTest (tcu::TestContext& testCtx,
1075 const std::string& name,
1076 const std::string& description,
1077 const deUint32 numValues,
1078 const tcu::IVec3& localSize,
1079 const tcu::IVec3& workSize,
1080 const BufferType bufferType);
1082 const BufferType m_bufferType;
1083 const deUint32 m_numValues;
1084 const tcu::IVec3 m_localSize;
1085 const tcu::IVec3 m_workSize;
1088 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1091 BufferToBufferInvertTestInstance (Context& context,
1092 const deUint32 numValues,
1093 const tcu::IVec3& localSize,
1094 const tcu::IVec3& workSize,
1095 const BufferType bufferType);
1097 tcu::TestStatus iterate (void);
1100 const BufferType m_bufferType;
1101 const deUint32 m_numValues;
1102 const tcu::IVec3 m_localSize;
1103 const tcu::IVec3 m_workSize;
1106 BufferToBufferInvertTest::BufferToBufferInvertTest (tcu::TestContext& testCtx,
1107 const std::string& name,
1108 const std::string& description,
1109 const deUint32 numValues,
1110 const tcu::IVec3& localSize,
1111 const tcu::IVec3& workSize,
1112 const BufferType bufferType)
1113 : TestCase (testCtx, name, description)
1114 , m_bufferType (bufferType)
1115 , m_numValues (numValues)
1116 , m_localSize (localSize)
1117 , m_workSize (workSize)
1119 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1120 DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1123 BufferToBufferInvertTest* BufferToBufferInvertTest::UBOToSSBOInvertCase (tcu::TestContext& testCtx,
1124 const std::string& name,
1125 const std::string& description,
1126 const deUint32 numValues,
1127 const tcu::IVec3& localSize,
1128 const tcu::IVec3& workSize)
1130 return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM);
1133 BufferToBufferInvertTest* BufferToBufferInvertTest::CopyInvertSSBOCase (tcu::TestContext& testCtx,
1134 const std::string& name,
1135 const std::string& description,
1136 const deUint32 numValues,
1137 const tcu::IVec3& localSize,
1138 const tcu::IVec3& workSize)
1140 return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_SSBO);
1143 void BufferToBufferInvertTest::initPrograms (SourceCollections& sourceCollections) const
1145 std::ostringstream src;
1146 if (m_bufferType == BUFFER_TYPE_UNIFORM)
1148 src << "#version 310 es\n"
1149 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1150 << "layout(binding = 0) readonly uniform Input {\n"
1151 << " uint values[" << m_numValues << "];\n"
1153 << "layout(binding = 1, std140) writeonly buffer Output {\n"
1154 << " uint values[" << m_numValues << "];\n"
1156 << "void main (void) {\n"
1157 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1158 << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1159 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1160 << " uint offset = numValuesPerInv*groupNdx;\n"
1162 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1163 << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1166 else if (m_bufferType == BUFFER_TYPE_SSBO)
1168 src << "#version 310 es\n"
1169 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1170 << "layout(binding = 0, std140) readonly buffer Input {\n"
1171 << " uint values[" << m_numValues << "];\n"
1173 << "layout (binding = 1, std140) writeonly buffer Output {\n"
1174 << " uint values[" << m_numValues << "];\n"
1176 << "void main (void) {\n"
1177 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1178 << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1179 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1180 << " uint offset = numValuesPerInv*groupNdx;\n"
1182 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1183 << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1187 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1190 TestInstance* BufferToBufferInvertTest::createInstance (Context& context) const
1192 return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType);
1195 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance (Context& context,
1196 const deUint32 numValues,
1197 const tcu::IVec3& localSize,
1198 const tcu::IVec3& workSize,
1199 const BufferType bufferType)
1200 : TestInstance (context)
1201 , m_bufferType (bufferType)
1202 , m_numValues (numValues)
1203 , m_localSize (localSize)
1204 , m_workSize (workSize)
1208 tcu::TestStatus BufferToBufferInvertTestInstance::iterate (void)
1210 const DeviceInterface& vk = m_context.getDeviceInterface();
1211 const VkDevice device = m_context.getDevice();
1212 const VkQueue queue = m_context.getUniversalQueue();
1213 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1214 Allocator& allocator = m_context.getDefaultAllocator();
1216 // Customize the test based on buffer type
1218 const VkBufferUsageFlags inputBufferUsageFlags = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1219 const VkDescriptorType inputBufferDescriptorType = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1220 const deUint32 randomSeed = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1222 // Create an input buffer
1224 const VkDeviceSize bufferSizeBytes = sizeof(tcu::UVec4) * m_numValues;
1225 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags), MemoryRequirement::HostVisible);
1227 // Fill the input buffer with data
1229 de::Random rnd(randomSeed);
1230 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
1231 tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(inputBufferAllocation.getHostPtr());
1232 for (deUint32 i = 0; i < m_numValues; ++i)
1233 bufferPtr[i].x() = rnd.getUint32();
1235 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
1238 // Create an output buffer
1240 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1242 // Create descriptor set
1244 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1245 DescriptorSetLayoutBuilder()
1246 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1247 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1248 .build(vk, device));
1250 const Unique<VkDescriptorPool> descriptorPool(
1251 DescriptorPoolBuilder()
1252 .addType(inputBufferDescriptorType)
1253 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1254 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1256 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1258 const VkDescriptorBufferInfo inputBufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1259 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1260 DescriptorSetUpdateBuilder()
1261 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType, &inputBufferDescriptorInfo)
1262 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1263 .update(vk, device);
1265 // Perform the computation
1267 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1268 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1269 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1271 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1273 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1275 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1276 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1278 // Start recording commands
1280 beginCommandBuffer(vk, *cmdBuffer);
1282 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1283 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1285 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1286 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1287 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1289 endCommandBuffer(vk, *cmdBuffer);
1291 // Wait for completion
1293 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1295 // Validate the results
1297 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1298 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1300 const tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(outputBufferAllocation.getHostPtr());
1301 const tcu::UVec4* refBufferPtr = static_cast<tcu::UVec4*>(inputBuffer.getAllocation().getHostPtr());
1303 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1305 const deUint32 res = bufferPtr[ndx].x();
1306 const deUint32 ref = ~refBufferPtr[ndx].x();
1310 std::ostringstream msg;
1311 msg << "Comparison failed for Output.values[" << ndx << "]";
1312 return tcu::TestStatus::fail(msg.str());
1315 return tcu::TestStatus::pass("Compute succeeded");
1318 class InvertSSBOInPlaceTest : public vkt::TestCase
1321 InvertSSBOInPlaceTest (tcu::TestContext& testCtx,
1322 const std::string& name,
1323 const std::string& description,
1324 const deUint32 numValues,
1326 const tcu::IVec3& localSize,
1327 const tcu::IVec3& workSize);
1330 void initPrograms (SourceCollections& sourceCollections) const;
1331 TestInstance* createInstance (Context& context) const;
1334 const deUint32 m_numValues;
1336 const tcu::IVec3 m_localSize;
1337 const tcu::IVec3 m_workSize;
1340 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1343 InvertSSBOInPlaceTestInstance (Context& context,
1344 const deUint32 numValues,
1345 const tcu::IVec3& localSize,
1346 const tcu::IVec3& workSize);
1348 tcu::TestStatus iterate (void);
1351 const deUint32 m_numValues;
1352 const tcu::IVec3 m_localSize;
1353 const tcu::IVec3 m_workSize;
1356 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest (tcu::TestContext& testCtx,
1357 const std::string& name,
1358 const std::string& description,
1359 const deUint32 numValues,
1361 const tcu::IVec3& localSize,
1362 const tcu::IVec3& workSize)
1363 : TestCase (testCtx, name, description)
1364 , m_numValues (numValues)
1366 , m_localSize (localSize)
1367 , m_workSize (workSize)
1369 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1372 void InvertSSBOInPlaceTest::initPrograms (SourceCollections& sourceCollections) const
1374 std::ostringstream src;
1375 src << "#version 310 es\n"
1376 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1377 << "layout(binding = 0) buffer InOut {\n"
1378 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1380 << "void main (void) {\n"
1381 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1382 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1383 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1384 << " uint offset = numValuesPerInv*groupNdx;\n"
1386 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1387 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1390 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1393 TestInstance* InvertSSBOInPlaceTest::createInstance (Context& context) const
1395 return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize);
1398 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance (Context& context,
1399 const deUint32 numValues,
1400 const tcu::IVec3& localSize,
1401 const tcu::IVec3& workSize)
1402 : TestInstance (context)
1403 , m_numValues (numValues)
1404 , m_localSize (localSize)
1405 , m_workSize (workSize)
1409 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate (void)
1411 const DeviceInterface& vk = m_context.getDeviceInterface();
1412 const VkDevice device = m_context.getDevice();
1413 const VkQueue queue = m_context.getUniversalQueue();
1414 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1415 Allocator& allocator = m_context.getDefaultAllocator();
1417 // Create an input/output buffer
1419 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1420 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1422 // Fill the buffer with data
1424 typedef std::vector<deUint32> data_vector_t;
1425 data_vector_t inputData(m_numValues);
1428 de::Random rnd(0x82ce7f);
1429 const Allocation& bufferAllocation = buffer.getAllocation();
1430 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1431 for (deUint32 i = 0; i < m_numValues; ++i)
1432 inputData[i] = *bufferPtr++ = rnd.getUint32();
1434 flushMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1437 // Create descriptor set
1439 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1440 DescriptorSetLayoutBuilder()
1441 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1442 .build(vk, device));
1444 const Unique<VkDescriptorPool> descriptorPool(
1445 DescriptorPoolBuilder()
1446 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1447 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1449 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1451 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1452 DescriptorSetUpdateBuilder()
1453 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1454 .update(vk, device);
1456 // Perform the computation
1458 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1459 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1460 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1462 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1464 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1466 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1467 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1469 // Start recording commands
1471 beginCommandBuffer(vk, *cmdBuffer);
1473 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1474 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1476 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1477 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1478 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1480 endCommandBuffer(vk, *cmdBuffer);
1482 // Wait for completion
1484 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1486 // Validate the results
1488 const Allocation& bufferAllocation = buffer.getAllocation();
1489 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1491 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1493 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1495 const deUint32 res = bufferPtr[ndx];
1496 const deUint32 ref = ~inputData[ndx];
1500 std::ostringstream msg;
1501 msg << "Comparison failed for InOut.values[" << ndx << "]";
1502 return tcu::TestStatus::fail(msg.str());
1505 return tcu::TestStatus::pass("Compute succeeded");
1508 class WriteToMultipleSSBOTest : public vkt::TestCase
1511 WriteToMultipleSSBOTest (tcu::TestContext& testCtx,
1512 const std::string& name,
1513 const std::string& description,
1514 const deUint32 numValues,
1516 const tcu::IVec3& localSize,
1517 const tcu::IVec3& workSize);
1519 void initPrograms (SourceCollections& sourceCollections) const;
1520 TestInstance* createInstance (Context& context) const;
1523 const deUint32 m_numValues;
1525 const tcu::IVec3 m_localSize;
1526 const tcu::IVec3 m_workSize;
1529 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1532 WriteToMultipleSSBOTestInstance (Context& context,
1533 const deUint32 numValues,
1534 const tcu::IVec3& localSize,
1535 const tcu::IVec3& workSize);
1537 tcu::TestStatus iterate (void);
1540 const deUint32 m_numValues;
1541 const tcu::IVec3 m_localSize;
1542 const tcu::IVec3 m_workSize;
1545 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest (tcu::TestContext& testCtx,
1546 const std::string& name,
1547 const std::string& description,
1548 const deUint32 numValues,
1550 const tcu::IVec3& localSize,
1551 const tcu::IVec3& workSize)
1552 : TestCase (testCtx, name, description)
1553 , m_numValues (numValues)
1555 , m_localSize (localSize)
1556 , m_workSize (workSize)
1558 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1561 void WriteToMultipleSSBOTest::initPrograms (SourceCollections& sourceCollections) const
1563 std::ostringstream src;
1564 src << "#version 310 es\n"
1565 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1566 << "layout(binding = 0) writeonly buffer Out0 {\n"
1567 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1569 << "layout(binding = 1) writeonly buffer Out1 {\n"
1570 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1572 << "void main (void) {\n"
1573 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1574 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1577 << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1578 << " uint offset = numValuesPerInv*groupNdx;\n"
1580 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1581 << " sb_out0.values[offset + ndx] = offset + ndx;\n"
1584 << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1585 << " uint offset = numValuesPerInv*groupNdx;\n"
1587 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1588 << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1592 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1595 TestInstance* WriteToMultipleSSBOTest::createInstance (Context& context) const
1597 return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize);
1600 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance (Context& context,
1601 const deUint32 numValues,
1602 const tcu::IVec3& localSize,
1603 const tcu::IVec3& workSize)
1604 : TestInstance (context)
1605 , m_numValues (numValues)
1606 , m_localSize (localSize)
1607 , m_workSize (workSize)
1611 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate (void)
1613 const DeviceInterface& vk = m_context.getDeviceInterface();
1614 const VkDevice device = m_context.getDevice();
1615 const VkQueue queue = m_context.getUniversalQueue();
1616 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1617 Allocator& allocator = m_context.getDefaultAllocator();
1619 // Create two output buffers
1621 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1622 const Buffer buffer0(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1623 const Buffer buffer1(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1625 // Create descriptor set
1627 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1628 DescriptorSetLayoutBuilder()
1629 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1630 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1631 .build(vk, device));
1633 const Unique<VkDescriptorPool> descriptorPool(
1634 DescriptorPoolBuilder()
1635 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1636 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1638 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1640 const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1641 const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1642 DescriptorSetUpdateBuilder()
1643 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1644 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1645 .update(vk, device);
1647 // Perform the computation
1649 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1650 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1651 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1653 const VkBufferMemoryBarrier shaderWriteBarriers[] =
1655 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes),
1656 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes)
1659 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1660 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1662 // Start recording commands
1664 beginCommandBuffer(vk, *cmdBuffer);
1666 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1667 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1669 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1670 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, DE_LENGTH_OF_ARRAY(shaderWriteBarriers), shaderWriteBarriers, 0, (const VkImageMemoryBarrier*)DE_NULL);
1672 endCommandBuffer(vk, *cmdBuffer);
1674 // Wait for completion
1676 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1678 // Validate the results
1680 const Allocation& buffer0Allocation = buffer0.getAllocation();
1681 invalidateMappedMemoryRange(vk, device, buffer0Allocation.getMemory(), buffer0Allocation.getOffset(), bufferSizeBytes);
1682 const deUint32* buffer0Ptr = static_cast<deUint32*>(buffer0Allocation.getHostPtr());
1684 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1686 const deUint32 res = buffer0Ptr[ndx];
1687 const deUint32 ref = ndx;
1691 std::ostringstream msg;
1692 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1693 return tcu::TestStatus::fail(msg.str());
1698 const Allocation& buffer1Allocation = buffer1.getAllocation();
1699 invalidateMappedMemoryRange(vk, device, buffer1Allocation.getMemory(), buffer1Allocation.getOffset(), bufferSizeBytes);
1700 const deUint32* buffer1Ptr = static_cast<deUint32*>(buffer1Allocation.getHostPtr());
1702 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1704 const deUint32 res = buffer1Ptr[ndx];
1705 const deUint32 ref = m_numValues - ndx;
1709 std::ostringstream msg;
1710 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1711 return tcu::TestStatus::fail(msg.str());
1715 return tcu::TestStatus::pass("Compute succeeded");
1718 class SSBOBarrierTest : public vkt::TestCase
1721 SSBOBarrierTest (tcu::TestContext& testCtx,
1722 const std::string& name,
1723 const std::string& description,
1724 const tcu::IVec3& workSize);
1726 void initPrograms (SourceCollections& sourceCollections) const;
1727 TestInstance* createInstance (Context& context) const;
1730 const tcu::IVec3 m_workSize;
1733 class SSBOBarrierTestInstance : public vkt::TestInstance
1736 SSBOBarrierTestInstance (Context& context,
1737 const tcu::IVec3& workSize);
1739 tcu::TestStatus iterate (void);
1742 const tcu::IVec3 m_workSize;
1745 SSBOBarrierTest::SSBOBarrierTest (tcu::TestContext& testCtx,
1746 const std::string& name,
1747 const std::string& description,
1748 const tcu::IVec3& workSize)
1749 : TestCase (testCtx, name, description)
1750 , m_workSize (workSize)
1754 void SSBOBarrierTest::initPrograms (SourceCollections& sourceCollections) const
1756 sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
1758 "layout (local_size_x = 1) in;\n"
1759 "layout(binding = 2) readonly uniform Constants {\n"
1760 " uint u_baseVal;\n"
1762 "layout(binding = 1) writeonly buffer Output {\n"
1765 "void main (void) {\n"
1766 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1767 " values[offset] = u_baseVal + offset;\n"
1770 sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
1772 "layout (local_size_x = 1) in;\n"
1773 "layout(binding = 1) readonly buffer Input {\n"
1776 "layout(binding = 0) coherent buffer Output {\n"
1779 "void main (void) {\n"
1780 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1781 " uint value = values[offset];\n"
1782 " atomicAdd(sum, value);\n"
1786 TestInstance* SSBOBarrierTest::createInstance (Context& context) const
1788 return new SSBOBarrierTestInstance(context, m_workSize);
1791 SSBOBarrierTestInstance::SSBOBarrierTestInstance (Context& context, const tcu::IVec3& workSize)
1792 : TestInstance (context)
1793 , m_workSize (workSize)
1797 tcu::TestStatus SSBOBarrierTestInstance::iterate (void)
1799 const DeviceInterface& vk = m_context.getDeviceInterface();
1800 const VkDevice device = m_context.getDevice();
1801 const VkQueue queue = m_context.getUniversalQueue();
1802 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1803 Allocator& allocator = m_context.getDefaultAllocator();
1805 // Create a work buffer used by both shaders
1807 const int workGroupCount = multiplyComponents(m_workSize);
1808 const VkDeviceSize workBufferSizeBytes = sizeof(deUint32) * workGroupCount;
1809 const Buffer workBuffer(vk, device, allocator, makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any);
1811 // Create an output buffer
1813 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
1814 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1816 // Initialize atomic counter value to zero
1818 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1819 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1820 *outputBufferPtr = 0;
1821 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1824 // Create a uniform buffer (to pass uniform constants)
1826 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
1827 const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
1829 // Set the constants in the uniform buffer
1831 const deUint32 baseValue = 127;
1833 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
1834 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
1835 uniformBufferPtr[0] = baseValue;
1837 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
1840 // Create descriptor set
1842 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1843 DescriptorSetLayoutBuilder()
1844 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1845 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1846 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1847 .build(vk, device));
1849 const Unique<VkDescriptorPool> descriptorPool(
1850 DescriptorPoolBuilder()
1851 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1852 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1853 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1855 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1857 const VkDescriptorBufferInfo workBufferDescriptorInfo = makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1858 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1859 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1860 DescriptorSetUpdateBuilder()
1861 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1862 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1863 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1864 .update(vk, device);
1866 // Perform the computation
1868 const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
1869 const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
1871 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1872 const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
1873 const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
1875 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
1877 const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
1879 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
1881 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1882 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1884 // Start recording commands
1886 beginCommandBuffer(vk, *cmdBuffer);
1888 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
1889 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1891 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1893 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1894 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &betweenShadersBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1896 // Switch to the second shader program
1897 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
1899 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1900 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1902 endCommandBuffer(vk, *cmdBuffer);
1904 // Wait for completion
1906 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1908 // Validate the results
1910 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1911 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1913 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1914 const deUint32 res = *bufferPtr;
1917 for (int ndx = 0; ndx < workGroupCount; ++ndx)
1918 ref += baseValue + ndx;
1922 std::ostringstream msg;
1923 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
1924 return tcu::TestStatus::fail(msg.str());
1926 return tcu::TestStatus::pass("Compute succeeded");
1929 class ImageAtomicOpTest : public vkt::TestCase
1932 ImageAtomicOpTest (tcu::TestContext& testCtx,
1933 const std::string& name,
1934 const std::string& description,
1935 const deUint32 localSize,
1936 const tcu::IVec2& imageSize);
1938 void initPrograms (SourceCollections& sourceCollections) const;
1939 TestInstance* createInstance (Context& context) const;
1942 const deUint32 m_localSize;
1943 const tcu::IVec2 m_imageSize;
1946 class ImageAtomicOpTestInstance : public vkt::TestInstance
1949 ImageAtomicOpTestInstance (Context& context,
1950 const deUint32 localSize,
1951 const tcu::IVec2& imageSize);
1953 tcu::TestStatus iterate (void);
1956 const deUint32 m_localSize;
1957 const tcu::IVec2 m_imageSize;
1960 ImageAtomicOpTest::ImageAtomicOpTest (tcu::TestContext& testCtx,
1961 const std::string& name,
1962 const std::string& description,
1963 const deUint32 localSize,
1964 const tcu::IVec2& imageSize)
1965 : TestCase (testCtx, name, description)
1966 , m_localSize (localSize)
1967 , m_imageSize (imageSize)
1971 void ImageAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
1973 std::ostringstream src;
1974 src << "#version 310 es\n"
1975 << "#extension GL_OES_shader_image_atomic : require\n"
1976 << "layout (local_size_x = " << m_localSize << ") in;\n"
1977 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
1978 << "layout(binding = 0) readonly buffer Input {\n"
1979 << " uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
1981 << "void main (void) {\n"
1982 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1983 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1985 << " if (gl_LocalInvocationIndex == 0u)\n"
1986 << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1987 << " memoryBarrierImage();\n"
1989 << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1992 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1995 TestInstance* ImageAtomicOpTest::createInstance (Context& context) const
1997 return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize);
2000 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance (Context& context, const deUint32 localSize, const tcu::IVec2& imageSize)
2001 : TestInstance (context)
2002 , m_localSize (localSize)
2003 , m_imageSize (imageSize)
2007 tcu::TestStatus ImageAtomicOpTestInstance::iterate (void)
2009 const DeviceInterface& vk = m_context.getDeviceInterface();
2010 const VkDevice device = m_context.getDevice();
2011 const VkQueue queue = m_context.getUniversalQueue();
2012 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2013 Allocator& allocator = m_context.getDefaultAllocator();
2017 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2018 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2020 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2021 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2025 const deUint32 numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2026 const VkDeviceSize inputBufferSizeBytes = sizeof(deUint32) * numInputValues;
2028 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
2030 // Populate the input buffer with test data
2032 de::Random rnd(0x77238ac2);
2033 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
2034 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
2035 for (deUint32 i = 0; i < numInputValues; ++i)
2036 *bufferPtr++ = rnd.getUint32();
2038 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), inputBufferSizeBytes);
2041 // Create a buffer to store shader output (copied from image data)
2043 const deUint32 imageArea = multiplyComponents(m_imageSize);
2044 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32) * imageArea;
2045 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
2047 // Create descriptor set
2049 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2050 DescriptorSetLayoutBuilder()
2051 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2052 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2053 .build(vk, device));
2055 const Unique<VkDescriptorPool> descriptorPool(
2056 DescriptorPoolBuilder()
2057 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2058 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2059 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2061 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2065 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2066 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2068 DescriptorSetUpdateBuilder()
2069 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2070 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2071 .update(vk, device);
2073 // Perform the computation
2075 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
2076 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2077 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2079 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2081 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
2082 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
2083 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2084 *image, subresourceRange);
2086 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2088 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
2090 // Prepare the command buffer
2092 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2093 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2095 // Start recording commands
2097 beginCommandBuffer(vk, *cmdBuffer);
2099 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2100 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2102 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2103 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2105 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
2106 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, ©Params);
2107 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2109 endCommandBuffer(vk, *cmdBuffer);
2111 // Wait for completion
2113 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2116 // Validate the results
2118 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2119 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2121 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2122 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
2124 for (deUint32 pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2126 const deUint32 res = bufferPtr[pixelNdx];
2129 for (deUint32 offs = 0; offs < m_localSize; ++offs)
2130 ref += refBufferPtr[pixelNdx * m_localSize + offs];
2134 std::ostringstream msg;
2135 msg << "Comparison failed for pixel " << pixelNdx;
2136 return tcu::TestStatus::fail(msg.str());
2139 return tcu::TestStatus::pass("Compute succeeded");
2142 class ImageBarrierTest : public vkt::TestCase
2145 ImageBarrierTest (tcu::TestContext& testCtx,
2146 const std::string& name,
2147 const std::string& description,
2148 const tcu::IVec2& imageSize);
2150 void initPrograms (SourceCollections& sourceCollections) const;
2151 TestInstance* createInstance (Context& context) const;
2154 const tcu::IVec2 m_imageSize;
2157 class ImageBarrierTestInstance : public vkt::TestInstance
2160 ImageBarrierTestInstance (Context& context,
2161 const tcu::IVec2& imageSize);
2163 tcu::TestStatus iterate (void);
2166 const tcu::IVec2 m_imageSize;
2169 ImageBarrierTest::ImageBarrierTest (tcu::TestContext& testCtx,
2170 const std::string& name,
2171 const std::string& description,
2172 const tcu::IVec2& imageSize)
2173 : TestCase (testCtx, name, description)
2174 , m_imageSize (imageSize)
2178 void ImageBarrierTest::initPrograms (SourceCollections& sourceCollections) const
2180 sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
2182 "layout (local_size_x = 1) in;\n"
2183 "layout(binding = 2) readonly uniform Constants {\n"
2184 " uint u_baseVal;\n"
2186 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2187 "void main (void) {\n"
2188 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2189 " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2192 sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
2194 "layout (local_size_x = 1) in;\n"
2195 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2196 "layout(binding = 0) coherent buffer Output {\n"
2199 "void main (void) {\n"
2200 " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2201 " atomicAdd(sum, value);\n"
2205 TestInstance* ImageBarrierTest::createInstance (Context& context) const
2207 return new ImageBarrierTestInstance(context, m_imageSize);
2210 ImageBarrierTestInstance::ImageBarrierTestInstance (Context& context, const tcu::IVec2& imageSize)
2211 : TestInstance (context)
2212 , m_imageSize (imageSize)
2216 tcu::TestStatus ImageBarrierTestInstance::iterate (void)
2218 const DeviceInterface& vk = m_context.getDeviceInterface();
2219 const VkDevice device = m_context.getDevice();
2220 const VkQueue queue = m_context.getUniversalQueue();
2221 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2222 Allocator& allocator = m_context.getDefaultAllocator();
2224 // Create an image used by both shaders
2226 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2227 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2229 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2230 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2232 // Create an output buffer
2234 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
2235 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2237 // Initialize atomic counter value to zero
2239 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2240 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2241 *outputBufferPtr = 0;
2242 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2245 // Create a uniform buffer (to pass uniform constants)
2247 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
2248 const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2250 // Set the constants in the uniform buffer
2252 const deUint32 baseValue = 127;
2254 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
2255 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
2256 uniformBufferPtr[0] = baseValue;
2258 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
2261 // Create descriptor set
2263 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2264 DescriptorSetLayoutBuilder()
2265 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2266 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2267 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2268 .build(vk, device));
2270 const Unique<VkDescriptorPool> descriptorPool(
2271 DescriptorPoolBuilder()
2272 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2273 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2274 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2275 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2277 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2279 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2280 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2281 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2282 DescriptorSetUpdateBuilder()
2283 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2284 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2285 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2286 .update(vk, device);
2288 // Perform the computation
2290 const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
2291 const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
2293 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2294 const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
2295 const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
2297 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2299 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2301 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2302 *image, subresourceRange);
2304 const VkImageMemoryBarrier imageBarrierBetweenShaders = makeImageMemoryBarrier(
2305 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
2306 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2307 *image, subresourceRange);
2309 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2311 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2312 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2314 // Start recording commands
2316 beginCommandBuffer(vk, *cmdBuffer);
2318 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
2319 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2321 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 1, &imageLayoutBarrier);
2323 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2324 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imageBarrierBetweenShaders);
2326 // Switch to the second shader program
2327 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
2329 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2330 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2332 endCommandBuffer(vk, *cmdBuffer);
2334 // Wait for completion
2336 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2338 // Validate the results
2340 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2341 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2343 const int numValues = multiplyComponents(m_imageSize);
2344 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2345 const deUint32 res = *bufferPtr;
2348 for (int ndx = 0; ndx < numValues; ++ndx)
2349 ref += baseValue + ndx;
2353 std::ostringstream msg;
2354 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2355 return tcu::TestStatus::fail(msg.str());
2357 return tcu::TestStatus::pass("Compute succeeded");
2360 namespace EmptyShaderTest
2363 void createProgram (SourceCollections& dst)
2365 dst.glslSources.add("comp") << glu::ComputeSource(
2367 "layout (local_size_x = 1) in;\n"
2368 "void main (void) {}\n"
2372 tcu::TestStatus createTest (Context& context)
2374 const DeviceInterface& vk = context.getDeviceInterface();
2375 const VkDevice device = context.getDevice();
2376 const VkQueue queue = context.getUniversalQueue();
2377 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2379 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u));
2381 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device));
2382 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2384 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2385 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2387 // Start recording commands
2389 beginCommandBuffer(vk, *cmdBuffer);
2391 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2393 const tcu::IVec3 workGroups(1, 1, 1);
2394 vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
2396 endCommandBuffer(vk, *cmdBuffer);
2398 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2400 return tcu::TestStatus::pass("Compute succeeded");
2403 } // EmptyShaderTest ns
2406 tcu::TestCaseGroup* createBasicComputeShaderTests (tcu::TestContext& testCtx)
2408 de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic", "Basic compute tests"));
2410 addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", "Shader that does nothing", EmptyShaderTest::createProgram, EmptyShaderTest::createTest);
2412 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_invocation", "Copy from UBO to SSBO, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2413 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_group", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(2,1,4), tcu::IVec3(1,1,1)));
2414 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_invocations", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1)));
2415 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_groups", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2417 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_single_invocation", "Copy between SSBOs, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2418 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_invocations", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1)));
2419 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_groups", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2421 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_single_invocation", "Read and write same SSBO", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2422 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_multiple_groups", "Read and write same SSBO", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2423 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_single_invocation", "Read and write same SSBO", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2424 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_multiple_groups", "Read and write same SSBO", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2426 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_single_invocation", "Write to multiple SSBOs", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2427 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_multiple_groups", "Write to multiple SSBOs", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2428 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2429 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_multiple_groups", "Write to multiple SSBOs", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2431 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_invocation", "SSBO local barrier usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2432 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_group", "SSBO local barrier usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2433 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_multiple_groups", "SSBO local barrier usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2435 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_single", "SSBO memory barrier usage", tcu::IVec3(1,1,1)));
2436 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_multiple", "SSBO memory barrier usage", tcu::IVec3(11,5,7)));
2438 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_invocation", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2439 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_group", "Basic shared variable usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2440 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_invocations", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4)));
2441 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_groups", "Basic shared variable usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2443 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_invocation", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2444 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_group", "Atomic operation with shared var", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2445 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_invocations", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4)));
2446 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_groups", "Atomic operation with shared var", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2448 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_small", "Image to SSBO copy", tcu::IVec2(1,1), tcu::IVec2(64,64)));
2449 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_large", "Image to SSBO copy", tcu::IVec2(2,4), tcu::IVec2(512,512)));
2451 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_small", "SSBO to image copy", tcu::IVec2(1, 1), tcu::IVec2(64, 64)));
2452 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_large", "SSBO to image copy", tcu::IVec2(2, 4), tcu::IVec2(512, 512)));
2454 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_1", "Atomic operation with image", 1, tcu::IVec2(64,64)));
2455 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_8", "Atomic operation with image", 8, tcu::IVec2(64,64)));
2457 basicComputeTests->addChild(new ImageBarrierTest(testCtx, "image_barrier_single", "Image barrier", tcu::IVec2(1,1)));
2458 basicComputeTests->addChild(new ImageBarrierTest(testCtx, "image_barrier_multiple", "Image barrier", tcu::IVec2(64,64)));
2460 return basicComputeTests.release();