1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
6 * Copyright (c) 2016 The Android Open Source Project
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and/or associated documentation files (the
10 * "Materials"), to deal in the Materials without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sublicense, and/or sell copies of the Materials, and to
13 * permit persons to whom the Materials are furnished to do so, subject to
14 * the following conditions:
16 * The above copyright notice(s) and this permission notice shall be included
17 * in all copies or substantial portions of the Materials.
19 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
29 * \brief Compute Shader Tests
30 *//*--------------------------------------------------------------------*/
32 #include "vktComputeBasicComputeShaderTests.hpp"
33 #include "vktTestCase.hpp"
34 #include "vktTestCaseUtil.hpp"
35 #include "vktComputeTestsUtil.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkPlatform.hpp"
41 #include "vkPrograms.hpp"
42 #include "vkRefUtil.hpp"
43 #include "vkMemUtil.hpp"
44 #include "vkQueryUtil.hpp"
45 #include "vkBuilderUtil.hpp"
46 #include "vkTypeUtil.hpp"
48 #include "deStringUtil.hpp"
49 #include "deUniquePtr.hpp"
50 #include "deRandom.hpp"
63 template<typename T, int size>
64 T multiplyComponents (const tcu::Vector<T, size>& v)
67 for (int i = 0; i < size; ++i)
73 inline T squared (const T& a)
78 inline VkImageCreateInfo make2DImageCreateInfo (const tcu::IVec2& imageSize, const VkImageUsageFlags usage)
80 const VkImageCreateInfo imageParams =
82 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
83 DE_NULL, // const void* pNext;
84 0u, // VkImageCreateFlags flags;
85 VK_IMAGE_TYPE_2D, // VkImageType imageType;
86 VK_FORMAT_R32_UINT, // VkFormat format;
87 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), // VkExtent3D extent;
88 1u, // deUint32 mipLevels;
89 1u, // deUint32 arrayLayers;
90 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
91 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
92 usage, // VkImageUsageFlags usage;
93 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
94 0u, // deUint32 queueFamilyIndexCount;
95 DE_NULL, // const deUint32* pQueueFamilyIndices;
96 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
101 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2& imageSize)
103 return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
112 class SharedVarTest : public vkt::TestCase
115 SharedVarTest (tcu::TestContext& testCtx,
116 const std::string& name,
117 const std::string& description,
118 const tcu::IVec3& localSize,
119 const tcu::IVec3& workSize);
121 void initPrograms (SourceCollections& sourceCollections) const;
122 TestInstance* createInstance (Context& context) const;
125 const tcu::IVec3 m_localSize;
126 const tcu::IVec3 m_workSize;
129 class SharedVarTestInstance : public vkt::TestInstance
132 SharedVarTestInstance (Context& context,
133 const tcu::IVec3& localSize,
134 const tcu::IVec3& workSize);
136 tcu::TestStatus iterate (void);
139 const tcu::IVec3 m_localSize;
140 const tcu::IVec3 m_workSize;
143 SharedVarTest::SharedVarTest (tcu::TestContext& testCtx,
144 const std::string& name,
145 const std::string& description,
146 const tcu::IVec3& localSize,
147 const tcu::IVec3& workSize)
148 : TestCase (testCtx, name, description)
149 , m_localSize (localSize)
150 , m_workSize (workSize)
154 void SharedVarTest::initPrograms (SourceCollections& sourceCollections) const
156 const int workGroupSize = multiplyComponents(m_localSize);
157 const int workGroupCount = multiplyComponents(m_workSize);
158 const int numValues = workGroupSize * workGroupCount;
160 std::ostringstream src;
161 src << "#version 310 es\n"
162 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
163 << "layout(binding = 0) writeonly buffer Output {\n"
164 << " uint values[" << numValues << "];\n"
166 << "shared uint offsets[" << workGroupSize << "];\n\n"
167 << "void main (void) {\n"
168 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
169 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
170 << " uint globalOffs = localSize*globalNdx;\n"
171 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
173 << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
174 << " memoryBarrierShared();\n"
176 << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
179 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
182 TestInstance* SharedVarTest::createInstance (Context& context) const
184 return new SharedVarTestInstance(context, m_localSize, m_workSize);
187 SharedVarTestInstance::SharedVarTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
188 : TestInstance (context)
189 , m_localSize (localSize)
190 , m_workSize (workSize)
194 tcu::TestStatus SharedVarTestInstance::iterate (void)
196 const DeviceInterface& vk = m_context.getDeviceInterface();
197 const VkDevice device = m_context.getDevice();
198 const VkQueue queue = m_context.getUniversalQueue();
199 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
200 Allocator& allocator = m_context.getDefaultAllocator();
202 const int workGroupSize = multiplyComponents(m_localSize);
203 const int workGroupCount = multiplyComponents(m_workSize);
205 // Create a buffer and host-visible memory for it
207 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
208 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
210 // Create descriptor set
212 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
213 DescriptorSetLayoutBuilder()
214 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
217 const Unique<VkDescriptorPool> descriptorPool(
218 DescriptorPoolBuilder()
219 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
220 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
222 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
224 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
225 DescriptorSetUpdateBuilder()
226 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
229 // Perform the computation
231 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
232 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
233 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
235 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
237 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
238 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
240 // Start recording commands
242 beginCommandBuffer(vk, *cmdBuffer);
244 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
245 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
247 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
249 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
251 endCommandBuffer(vk, *cmdBuffer);
253 // Wait for completion
255 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
257 // Validate the results
259 const Allocation& bufferAllocation = buffer.getAllocation();
260 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
262 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
264 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
266 const int globalOffset = groupNdx * workGroupSize;
267 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
269 const deUint32 res = bufferPtr[globalOffset + localOffset];
270 const deUint32 ref = globalOffset + squared(workGroupSize - localOffset - 1);
274 std::ostringstream msg;
275 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
276 return tcu::TestStatus::fail(msg.str());
280 return tcu::TestStatus::pass("Compute succeeded");
283 class SharedVarAtomicOpTest : public vkt::TestCase
286 SharedVarAtomicOpTest (tcu::TestContext& testCtx,
287 const std::string& name,
288 const std::string& description,
289 const tcu::IVec3& localSize,
290 const tcu::IVec3& workSize);
292 void initPrograms (SourceCollections& sourceCollections) const;
293 TestInstance* createInstance (Context& context) const;
296 const tcu::IVec3 m_localSize;
297 const tcu::IVec3 m_workSize;
300 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
303 SharedVarAtomicOpTestInstance (Context& context,
304 const tcu::IVec3& localSize,
305 const tcu::IVec3& workSize);
307 tcu::TestStatus iterate (void);
310 const tcu::IVec3 m_localSize;
311 const tcu::IVec3 m_workSize;
314 SharedVarAtomicOpTest::SharedVarAtomicOpTest (tcu::TestContext& testCtx,
315 const std::string& name,
316 const std::string& description,
317 const tcu::IVec3& localSize,
318 const tcu::IVec3& workSize)
319 : TestCase (testCtx, name, description)
320 , m_localSize (localSize)
321 , m_workSize (workSize)
325 void SharedVarAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
327 const int workGroupSize = multiplyComponents(m_localSize);
328 const int workGroupCount = multiplyComponents(m_workSize);
329 const int numValues = workGroupSize * workGroupCount;
331 std::ostringstream src;
332 src << "#version 310 es\n"
333 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
334 << "layout(binding = 0) writeonly buffer Output {\n"
335 << " uint values[" << numValues << "];\n"
337 << "shared uint count;\n\n"
338 << "void main (void) {\n"
339 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
340 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
341 << " uint globalOffs = localSize*globalNdx;\n"
344 << " memoryBarrierShared();\n"
346 << " uint oldVal = atomicAdd(count, 1u);\n"
347 << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
350 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
353 TestInstance* SharedVarAtomicOpTest::createInstance (Context& context) const
355 return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize);
358 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
359 : TestInstance (context)
360 , m_localSize (localSize)
361 , m_workSize (workSize)
365 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate (void)
367 const DeviceInterface& vk = m_context.getDeviceInterface();
368 const VkDevice device = m_context.getDevice();
369 const VkQueue queue = m_context.getUniversalQueue();
370 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
371 Allocator& allocator = m_context.getDefaultAllocator();
373 const int workGroupSize = multiplyComponents(m_localSize);
374 const int workGroupCount = multiplyComponents(m_workSize);
376 // Create a buffer and host-visible memory for it
378 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
379 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
381 // Create descriptor set
383 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
384 DescriptorSetLayoutBuilder()
385 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
388 const Unique<VkDescriptorPool> descriptorPool(
389 DescriptorPoolBuilder()
390 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
391 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
393 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
395 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
396 DescriptorSetUpdateBuilder()
397 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
400 // Perform the computation
402 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
403 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
404 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
406 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
408 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
409 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
411 // Start recording commands
413 beginCommandBuffer(vk, *cmdBuffer);
415 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
416 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
418 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
420 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1u, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
422 endCommandBuffer(vk, *cmdBuffer);
424 // Wait for completion
426 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
428 // Validate the results
430 const Allocation& bufferAllocation = buffer.getAllocation();
431 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
433 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
435 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
437 const int globalOffset = groupNdx * workGroupSize;
438 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
440 const deUint32 res = bufferPtr[globalOffset + localOffset];
441 const deUint32 ref = localOffset + 1;
445 std::ostringstream msg;
446 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
447 return tcu::TestStatus::fail(msg.str());
451 return tcu::TestStatus::pass("Compute succeeded");
454 class SSBOLocalBarrierTest : public vkt::TestCase
457 SSBOLocalBarrierTest (tcu::TestContext& testCtx,
458 const std::string& name,
459 const std::string& description,
460 const tcu::IVec3& localSize,
461 const tcu::IVec3& workSize);
463 void initPrograms (SourceCollections& sourceCollections) const;
464 TestInstance* createInstance (Context& context) const;
467 const tcu::IVec3 m_localSize;
468 const tcu::IVec3 m_workSize;
471 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
474 SSBOLocalBarrierTestInstance (Context& context,
475 const tcu::IVec3& localSize,
476 const tcu::IVec3& workSize);
478 tcu::TestStatus iterate (void);
481 const tcu::IVec3 m_localSize;
482 const tcu::IVec3 m_workSize;
485 SSBOLocalBarrierTest::SSBOLocalBarrierTest (tcu::TestContext& testCtx,
486 const std::string& name,
487 const std::string& description,
488 const tcu::IVec3& localSize,
489 const tcu::IVec3& workSize)
490 : TestCase (testCtx, name, description)
491 , m_localSize (localSize)
492 , m_workSize (workSize)
496 void SSBOLocalBarrierTest::initPrograms (SourceCollections& sourceCollections) const
498 const int workGroupSize = multiplyComponents(m_localSize);
499 const int workGroupCount = multiplyComponents(m_workSize);
500 const int numValues = workGroupSize * workGroupCount;
502 std::ostringstream src;
503 src << "#version 310 es\n"
504 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
505 << "layout(binding = 0) coherent buffer Output {\n"
506 << " uint values[" << numValues << "];\n"
508 << "void main (void) {\n"
509 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
510 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
511 << " uint globalOffs = localSize*globalNdx;\n"
512 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
514 << " sb_out.values[globalOffs + localOffs] = globalOffs;\n"
515 << " memoryBarrierBuffer();\n"
517 << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" // += so we read and write
518 << " memoryBarrierBuffer();\n"
520 << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
523 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
526 TestInstance* SSBOLocalBarrierTest::createInstance (Context& context) const
528 return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize);
531 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
532 : TestInstance (context)
533 , m_localSize (localSize)
534 , m_workSize (workSize)
538 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate (void)
540 const DeviceInterface& vk = m_context.getDeviceInterface();
541 const VkDevice device = m_context.getDevice();
542 const VkQueue queue = m_context.getUniversalQueue();
543 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
544 Allocator& allocator = m_context.getDefaultAllocator();
546 const int workGroupSize = multiplyComponents(m_localSize);
547 const int workGroupCount = multiplyComponents(m_workSize);
549 // Create a buffer and host-visible memory for it
551 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
552 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
554 // Create descriptor set
556 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
557 DescriptorSetLayoutBuilder()
558 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
561 const Unique<VkDescriptorPool> descriptorPool(
562 DescriptorPoolBuilder()
563 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
564 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
566 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
568 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
569 DescriptorSetUpdateBuilder()
570 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
573 // Perform the computation
575 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
576 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
577 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
579 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
581 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
582 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
584 // Start recording commands
586 beginCommandBuffer(vk, *cmdBuffer);
588 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
589 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
591 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
593 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
595 endCommandBuffer(vk, *cmdBuffer);
597 // Wait for completion
599 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
601 // Validate the results
603 const Allocation& bufferAllocation = buffer.getAllocation();
604 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
606 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
608 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
610 const int globalOffset = groupNdx * workGroupSize;
611 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
613 const deUint32 res = bufferPtr[globalOffset + localOffset];
614 const int offs0 = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) : ((localOffset - 1) % workGroupSize);
615 const int offs1 = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) : ((localOffset - 2) % workGroupSize);
616 const deUint32 ref = static_cast<deUint32>(globalOffset + offs0 + offs1);
620 std::ostringstream msg;
621 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
622 return tcu::TestStatus::fail(msg.str());
626 return tcu::TestStatus::pass("Compute succeeded");
629 class CopyImageToSSBOTest : public vkt::TestCase
632 CopyImageToSSBOTest (tcu::TestContext& testCtx,
633 const std::string& name,
634 const std::string& description,
635 const tcu::IVec2& localSize,
636 const tcu::IVec2& imageSize);
638 void initPrograms (SourceCollections& sourceCollections) const;
639 TestInstance* createInstance (Context& context) const;
642 const tcu::IVec2 m_localSize;
643 const tcu::IVec2 m_imageSize;
646 class CopyImageToSSBOTestInstance : public vkt::TestInstance
649 CopyImageToSSBOTestInstance (Context& context,
650 const tcu::IVec2& localSize,
651 const tcu::IVec2& imageSize);
653 tcu::TestStatus iterate (void);
656 const tcu::IVec2 m_localSize;
657 const tcu::IVec2 m_imageSize;
660 CopyImageToSSBOTest::CopyImageToSSBOTest (tcu::TestContext& testCtx,
661 const std::string& name,
662 const std::string& description,
663 const tcu::IVec2& localSize,
664 const tcu::IVec2& imageSize)
665 : TestCase (testCtx, name, description)
666 , m_localSize (localSize)
667 , m_imageSize (imageSize)
669 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
670 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
673 void CopyImageToSSBOTest::initPrograms (SourceCollections& sourceCollections) const
675 std::ostringstream src;
676 src << "#version 310 es\n"
677 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
678 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
679 << "layout(binding = 0) writeonly buffer Output {\n"
680 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
682 << "void main (void) {\n"
683 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
684 << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
685 << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
688 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
691 TestInstance* CopyImageToSSBOTest::createInstance (Context& context) const
693 return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize);
696 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
697 : TestInstance (context)
698 , m_localSize (localSize)
699 , m_imageSize (imageSize)
703 tcu::TestStatus CopyImageToSSBOTestInstance::iterate (void)
705 const DeviceInterface& vk = m_context.getDeviceInterface();
706 const VkDevice device = m_context.getDevice();
707 const VkQueue queue = m_context.getUniversalQueue();
708 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
709 Allocator& allocator = m_context.getDefaultAllocator();
713 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
714 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
716 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
717 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
719 // Staging buffer (source data for image)
721 const deUint32 imageArea = multiplyComponents(m_imageSize);
722 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
724 const Buffer stagingBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
726 // Populate the staging buffer with test data
728 de::Random rnd(0xab2c7);
729 const Allocation& stagingBufferAllocation = stagingBuffer.getAllocation();
730 deUint32* bufferPtr = static_cast<deUint32*>(stagingBufferAllocation.getHostPtr());
731 for (deUint32 i = 0; i < imageArea; ++i)
732 *bufferPtr++ = rnd.getUint32();
734 flushMappedMemoryRange(vk, device, stagingBufferAllocation.getMemory(), stagingBufferAllocation.getOffset(), bufferSizeBytes);
737 // Create a buffer to store shader output
739 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
741 // Create descriptor set
743 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
744 DescriptorSetLayoutBuilder()
745 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
746 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
749 const Unique<VkDescriptorPool> descriptorPool(
750 DescriptorPoolBuilder()
751 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
752 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
753 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
755 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
759 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
760 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
762 DescriptorSetUpdateBuilder()
763 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
764 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
767 // Perform the computation
769 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
770 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
771 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
773 const VkBufferMemoryBarrier stagingBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *stagingBuffer, 0ull, bufferSizeBytes);
775 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
777 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
778 *image, subresourceRange);
780 const VkImageMemoryBarrier imagePostCopyBarrier = makeImageMemoryBarrier(
781 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
782 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
783 *image, subresourceRange);
785 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
787 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
788 const tcu::IVec2 workSize = m_imageSize / m_localSize;
790 // Prepare the command buffer
792 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
793 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
795 // Start recording commands
797 beginCommandBuffer(vk, *cmdBuffer);
799 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
800 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
802 vk.cmdPipelineBarrier(*cmdBuffer, 0u, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &stagingBufferPostHostWriteBarrier, 1, &imagePreCopyBarrier);
803 vk.cmdCopyBufferToImage(*cmdBuffer, *stagingBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, ©Params);
804 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePostCopyBarrier);
806 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
807 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
809 endCommandBuffer(vk, *cmdBuffer);
811 // Wait for completion
813 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
816 // Validate the results
818 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
819 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
821 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
822 const deUint32* refBufferPtr = static_cast<deUint32*>(stagingBuffer.getAllocation().getHostPtr());
824 for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
826 const deUint32 res = *(bufferPtr + ndx);
827 const deUint32 ref = *(refBufferPtr + ndx);
831 std::ostringstream msg;
832 msg << "Comparison failed for Output.values[" << ndx << "]";
833 return tcu::TestStatus::fail(msg.str());
836 return tcu::TestStatus::pass("Compute succeeded");
839 class CopySSBOToImageTest : public vkt::TestCase
842 CopySSBOToImageTest (tcu::TestContext& testCtx,
843 const std::string& name,
844 const std::string& description,
845 const tcu::IVec2& localSize,
846 const tcu::IVec2& imageSize);
848 void initPrograms (SourceCollections& sourceCollections) const;
849 TestInstance* createInstance (Context& context) const;
852 const tcu::IVec2 m_localSize;
853 const tcu::IVec2 m_imageSize;
856 class CopySSBOToImageTestInstance : public vkt::TestInstance
859 CopySSBOToImageTestInstance (Context& context,
860 const tcu::IVec2& localSize,
861 const tcu::IVec2& imageSize);
863 tcu::TestStatus iterate (void);
866 const tcu::IVec2 m_localSize;
867 const tcu::IVec2 m_imageSize;
870 CopySSBOToImageTest::CopySSBOToImageTest (tcu::TestContext& testCtx,
871 const std::string& name,
872 const std::string& description,
873 const tcu::IVec2& localSize,
874 const tcu::IVec2& imageSize)
875 : TestCase (testCtx, name, description)
876 , m_localSize (localSize)
877 , m_imageSize (imageSize)
879 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
880 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
883 void CopySSBOToImageTest::initPrograms (SourceCollections& sourceCollections) const
885 std::ostringstream src;
886 src << "#version 310 es\n"
887 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
888 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
889 << "layout(binding = 0) readonly buffer Input {\n"
890 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
892 << "void main (void) {\n"
893 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
894 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
895 << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
898 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
901 TestInstance* CopySSBOToImageTest::createInstance (Context& context) const
903 return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize);
906 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
907 : TestInstance (context)
908 , m_localSize (localSize)
909 , m_imageSize (imageSize)
913 tcu::TestStatus CopySSBOToImageTestInstance::iterate (void)
915 const DeviceInterface& vk = m_context.getDeviceInterface();
916 const VkDevice device = m_context.getDevice();
917 const VkQueue queue = m_context.getUniversalQueue();
918 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
919 Allocator& allocator = m_context.getDefaultAllocator();
923 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
924 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
926 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
927 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
929 // Create an input buffer (data to be read in the shader)
931 const deUint32 imageArea = multiplyComponents(m_imageSize);
932 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
934 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
936 // Populate the buffer with test data
938 de::Random rnd(0x77238ac2);
939 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
940 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
941 for (deUint32 i = 0; i < imageArea; ++i)
942 *bufferPtr++ = rnd.getUint32();
944 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
947 // Create a buffer to store shader output (copied from image data)
949 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
951 // Create descriptor set
953 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
954 DescriptorSetLayoutBuilder()
955 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
956 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
959 const Unique<VkDescriptorPool> descriptorPool(
960 DescriptorPoolBuilder()
961 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
962 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
963 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
965 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
969 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
970 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
972 DescriptorSetUpdateBuilder()
973 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
974 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
977 // Perform the computation
979 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
980 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
981 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
983 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
985 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
987 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
988 *image, subresourceRange);
990 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
991 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
992 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
993 *image, subresourceRange);
995 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
997 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
998 const tcu::IVec2 workSize = m_imageSize / m_localSize;
1000 // Prepare the command buffer
1002 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1003 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1005 // Start recording commands
1007 beginCommandBuffer(vk, *cmdBuffer);
1009 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1010 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1012 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
1013 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1015 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
1016 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, ©Params);
1017 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1019 endCommandBuffer(vk, *cmdBuffer);
1021 // Wait for completion
1023 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1026 // Validate the results
1028 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1029 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1031 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1032 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1034 for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
1036 const deUint32 res = *(bufferPtr + ndx);
1037 const deUint32 ref = *(refBufferPtr + ndx);
1041 std::ostringstream msg;
1042 msg << "Comparison failed for pixel " << ndx;
1043 return tcu::TestStatus::fail(msg.str());
1046 return tcu::TestStatus::pass("Compute succeeded");
1049 class BufferToBufferInvertTest : public vkt::TestCase
1052 void initPrograms (SourceCollections& sourceCollections) const;
1053 TestInstance* createInstance (Context& context) const;
1055 static BufferToBufferInvertTest* UBOToSSBOInvertCase (tcu::TestContext& testCtx,
1056 const std::string& name,
1057 const std::string& description,
1058 const deUint32 numValues,
1059 const tcu::IVec3& localSize,
1060 const tcu::IVec3& workSize);
1062 static BufferToBufferInvertTest* CopyInvertSSBOCase (tcu::TestContext& testCtx,
1063 const std::string& name,
1064 const std::string& description,
1065 const deUint32 numValues,
1066 const tcu::IVec3& localSize,
1067 const tcu::IVec3& workSize);
1070 BufferToBufferInvertTest (tcu::TestContext& testCtx,
1071 const std::string& name,
1072 const std::string& description,
1073 const deUint32 numValues,
1074 const tcu::IVec3& localSize,
1075 const tcu::IVec3& workSize,
1076 const BufferType bufferType);
1078 const BufferType m_bufferType;
1079 const deUint32 m_numValues;
1080 const tcu::IVec3 m_localSize;
1081 const tcu::IVec3 m_workSize;
1084 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1087 BufferToBufferInvertTestInstance (Context& context,
1088 const deUint32 numValues,
1089 const tcu::IVec3& localSize,
1090 const tcu::IVec3& workSize,
1091 const BufferType bufferType);
1093 tcu::TestStatus iterate (void);
1096 const BufferType m_bufferType;
1097 const deUint32 m_numValues;
1098 const tcu::IVec3 m_localSize;
1099 const tcu::IVec3 m_workSize;
1102 BufferToBufferInvertTest::BufferToBufferInvertTest (tcu::TestContext& testCtx,
1103 const std::string& name,
1104 const std::string& description,
1105 const deUint32 numValues,
1106 const tcu::IVec3& localSize,
1107 const tcu::IVec3& workSize,
1108 const BufferType bufferType)
1109 : TestCase (testCtx, name, description)
1110 , m_bufferType (bufferType)
1111 , m_numValues (numValues)
1112 , m_localSize (localSize)
1113 , m_workSize (workSize)
1115 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1116 DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1119 BufferToBufferInvertTest* BufferToBufferInvertTest::UBOToSSBOInvertCase (tcu::TestContext& testCtx,
1120 const std::string& name,
1121 const std::string& description,
1122 const deUint32 numValues,
1123 const tcu::IVec3& localSize,
1124 const tcu::IVec3& workSize)
1126 return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM);
1129 BufferToBufferInvertTest* BufferToBufferInvertTest::CopyInvertSSBOCase (tcu::TestContext& testCtx,
1130 const std::string& name,
1131 const std::string& description,
1132 const deUint32 numValues,
1133 const tcu::IVec3& localSize,
1134 const tcu::IVec3& workSize)
1136 return new BufferToBufferInvertTest(testCtx, name, description, numValues, localSize, workSize, BUFFER_TYPE_SSBO);
1139 void BufferToBufferInvertTest::initPrograms (SourceCollections& sourceCollections) const
1141 std::ostringstream src;
1142 if (m_bufferType == BUFFER_TYPE_UNIFORM)
1144 src << "#version 310 es\n"
1145 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1146 << "layout(binding = 0) readonly uniform Input {\n"
1147 << " uint values[" << m_numValues << "];\n"
1149 << "layout(binding = 1, std140) writeonly buffer Output {\n"
1150 << " uint values[" << m_numValues << "];\n"
1152 << "void main (void) {\n"
1153 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1154 << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1155 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1156 << " uint offset = numValuesPerInv*groupNdx;\n"
1158 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1159 << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1162 else if (m_bufferType == BUFFER_TYPE_SSBO)
1164 src << "#version 310 es\n"
1165 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1166 << "layout(binding = 0, std140) readonly buffer Input {\n"
1167 << " uint values[" << m_numValues << "];\n"
1169 << "layout (binding = 1, std140) writeonly buffer Output {\n"
1170 << " uint values[" << m_numValues << "];\n"
1172 << "void main (void) {\n"
1173 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1174 << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1175 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1176 << " uint offset = numValuesPerInv*groupNdx;\n"
1178 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1179 << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1183 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1186 TestInstance* BufferToBufferInvertTest::createInstance (Context& context) const
1188 return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType);
1191 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance (Context& context,
1192 const deUint32 numValues,
1193 const tcu::IVec3& localSize,
1194 const tcu::IVec3& workSize,
1195 const BufferType bufferType)
1196 : TestInstance (context)
1197 , m_bufferType (bufferType)
1198 , m_numValues (numValues)
1199 , m_localSize (localSize)
1200 , m_workSize (workSize)
1204 tcu::TestStatus BufferToBufferInvertTestInstance::iterate (void)
1206 const DeviceInterface& vk = m_context.getDeviceInterface();
1207 const VkDevice device = m_context.getDevice();
1208 const VkQueue queue = m_context.getUniversalQueue();
1209 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1210 Allocator& allocator = m_context.getDefaultAllocator();
1212 // Customize the test based on buffer type
1214 const VkBufferUsageFlags inputBufferUsageFlags = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1215 const VkDescriptorType inputBufferDescriptorType = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1216 const deUint32 randomSeed = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1218 // Create an input buffer
1220 const VkDeviceSize bufferSizeBytes = sizeof(tcu::UVec4) * m_numValues;
1221 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags), MemoryRequirement::HostVisible);
1223 // Fill the input buffer with data
1225 de::Random rnd(randomSeed);
1226 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
1227 tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(inputBufferAllocation.getHostPtr());
1228 for (deUint32 i = 0; i < m_numValues; ++i)
1229 bufferPtr[i].x() = rnd.getUint32();
1231 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), bufferSizeBytes);
1234 // Create an output buffer
1236 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1238 // Create descriptor set
1240 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1241 DescriptorSetLayoutBuilder()
1242 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1243 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1244 .build(vk, device));
1246 const Unique<VkDescriptorPool> descriptorPool(
1247 DescriptorPoolBuilder()
1248 .addType(inputBufferDescriptorType)
1249 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1250 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1252 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1254 const VkDescriptorBufferInfo inputBufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1255 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1256 DescriptorSetUpdateBuilder()
1257 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType, &inputBufferDescriptorInfo)
1258 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1259 .update(vk, device);
1261 // Perform the computation
1263 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1264 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1265 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1267 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1269 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1271 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1272 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1274 // Start recording commands
1276 beginCommandBuffer(vk, *cmdBuffer);
1278 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1279 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1281 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1282 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1283 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1285 endCommandBuffer(vk, *cmdBuffer);
1287 // Wait for completion
1289 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1291 // Validate the results
1293 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1294 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), bufferSizeBytes);
1296 const tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(outputBufferAllocation.getHostPtr());
1297 const tcu::UVec4* refBufferPtr = static_cast<tcu::UVec4*>(inputBuffer.getAllocation().getHostPtr());
1299 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1301 const deUint32 res = bufferPtr[ndx].x();
1302 const deUint32 ref = ~refBufferPtr[ndx].x();
1306 std::ostringstream msg;
1307 msg << "Comparison failed for Output.values[" << ndx << "]";
1308 return tcu::TestStatus::fail(msg.str());
1311 return tcu::TestStatus::pass("Compute succeeded");
1314 class InvertSSBOInPlaceTest : public vkt::TestCase
1317 InvertSSBOInPlaceTest (tcu::TestContext& testCtx,
1318 const std::string& name,
1319 const std::string& description,
1320 const deUint32 numValues,
1322 const tcu::IVec3& localSize,
1323 const tcu::IVec3& workSize);
1326 void initPrograms (SourceCollections& sourceCollections) const;
1327 TestInstance* createInstance (Context& context) const;
1330 const deUint32 m_numValues;
1332 const tcu::IVec3 m_localSize;
1333 const tcu::IVec3 m_workSize;
1336 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1339 InvertSSBOInPlaceTestInstance (Context& context,
1340 const deUint32 numValues,
1341 const tcu::IVec3& localSize,
1342 const tcu::IVec3& workSize);
1344 tcu::TestStatus iterate (void);
1347 const deUint32 m_numValues;
1348 const tcu::IVec3 m_localSize;
1349 const tcu::IVec3 m_workSize;
1352 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest (tcu::TestContext& testCtx,
1353 const std::string& name,
1354 const std::string& description,
1355 const deUint32 numValues,
1357 const tcu::IVec3& localSize,
1358 const tcu::IVec3& workSize)
1359 : TestCase (testCtx, name, description)
1360 , m_numValues (numValues)
1362 , m_localSize (localSize)
1363 , m_workSize (workSize)
1365 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1368 void InvertSSBOInPlaceTest::initPrograms (SourceCollections& sourceCollections) const
1370 std::ostringstream src;
1371 src << "#version 310 es\n"
1372 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1373 << "layout(binding = 0) buffer InOut {\n"
1374 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1376 << "void main (void) {\n"
1377 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1378 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1379 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1380 << " uint offset = numValuesPerInv*groupNdx;\n"
1382 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1383 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1386 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1389 TestInstance* InvertSSBOInPlaceTest::createInstance (Context& context) const
1391 return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize);
1394 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance (Context& context,
1395 const deUint32 numValues,
1396 const tcu::IVec3& localSize,
1397 const tcu::IVec3& workSize)
1398 : TestInstance (context)
1399 , m_numValues (numValues)
1400 , m_localSize (localSize)
1401 , m_workSize (workSize)
1405 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate (void)
1407 const DeviceInterface& vk = m_context.getDeviceInterface();
1408 const VkDevice device = m_context.getDevice();
1409 const VkQueue queue = m_context.getUniversalQueue();
1410 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1411 Allocator& allocator = m_context.getDefaultAllocator();
1413 // Create an input/output buffer
1415 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1416 const Buffer buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1418 // Fill the buffer with data
1420 typedef std::vector<deUint32> data_vector_t;
1421 data_vector_t inputData(m_numValues);
1424 de::Random rnd(0x82ce7f);
1425 const Allocation& bufferAllocation = buffer.getAllocation();
1426 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1427 for (deUint32 i = 0; i < m_numValues; ++i)
1428 inputData[i] = *bufferPtr++ = rnd.getUint32();
1430 flushMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1433 // Create descriptor set
1435 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1436 DescriptorSetLayoutBuilder()
1437 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1438 .build(vk, device));
1440 const Unique<VkDescriptorPool> descriptorPool(
1441 DescriptorPoolBuilder()
1442 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1443 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1445 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1447 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1448 DescriptorSetUpdateBuilder()
1449 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1450 .update(vk, device);
1452 // Perform the computation
1454 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1455 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1456 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1458 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1460 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1462 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1463 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1465 // Start recording commands
1467 beginCommandBuffer(vk, *cmdBuffer);
1469 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1470 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1472 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1473 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1474 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1476 endCommandBuffer(vk, *cmdBuffer);
1478 // Wait for completion
1480 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1482 // Validate the results
1484 const Allocation& bufferAllocation = buffer.getAllocation();
1485 invalidateMappedMemoryRange(vk, device, bufferAllocation.getMemory(), bufferAllocation.getOffset(), bufferSizeBytes);
1487 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1489 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1491 const deUint32 res = bufferPtr[ndx];
1492 const deUint32 ref = ~inputData[ndx];
1496 std::ostringstream msg;
1497 msg << "Comparison failed for InOut.values[" << ndx << "]";
1498 return tcu::TestStatus::fail(msg.str());
1501 return tcu::TestStatus::pass("Compute succeeded");
1504 class WriteToMultipleSSBOTest : public vkt::TestCase
1507 WriteToMultipleSSBOTest (tcu::TestContext& testCtx,
1508 const std::string& name,
1509 const std::string& description,
1510 const deUint32 numValues,
1512 const tcu::IVec3& localSize,
1513 const tcu::IVec3& workSize);
1515 void initPrograms (SourceCollections& sourceCollections) const;
1516 TestInstance* createInstance (Context& context) const;
1519 const deUint32 m_numValues;
1521 const tcu::IVec3 m_localSize;
1522 const tcu::IVec3 m_workSize;
1525 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1528 WriteToMultipleSSBOTestInstance (Context& context,
1529 const deUint32 numValues,
1530 const tcu::IVec3& localSize,
1531 const tcu::IVec3& workSize);
1533 tcu::TestStatus iterate (void);
1536 const deUint32 m_numValues;
1537 const tcu::IVec3 m_localSize;
1538 const tcu::IVec3 m_workSize;
1541 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest (tcu::TestContext& testCtx,
1542 const std::string& name,
1543 const std::string& description,
1544 const deUint32 numValues,
1546 const tcu::IVec3& localSize,
1547 const tcu::IVec3& workSize)
1548 : TestCase (testCtx, name, description)
1549 , m_numValues (numValues)
1551 , m_localSize (localSize)
1552 , m_workSize (workSize)
1554 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1557 void WriteToMultipleSSBOTest::initPrograms (SourceCollections& sourceCollections) const
1559 std::ostringstream src;
1560 src << "#version 310 es\n"
1561 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1562 << "layout(binding = 0) writeonly buffer Out0 {\n"
1563 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1565 << "layout(binding = 1) writeonly buffer Out1 {\n"
1566 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1568 << "void main (void) {\n"
1569 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1570 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1573 << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1574 << " uint offset = numValuesPerInv*groupNdx;\n"
1576 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1577 << " sb_out0.values[offset + ndx] = offset + ndx;\n"
1580 << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1581 << " uint offset = numValuesPerInv*groupNdx;\n"
1583 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1584 << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1588 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1591 TestInstance* WriteToMultipleSSBOTest::createInstance (Context& context) const
1593 return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize);
1596 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance (Context& context,
1597 const deUint32 numValues,
1598 const tcu::IVec3& localSize,
1599 const tcu::IVec3& workSize)
1600 : TestInstance (context)
1601 , m_numValues (numValues)
1602 , m_localSize (localSize)
1603 , m_workSize (workSize)
1607 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate (void)
1609 const DeviceInterface& vk = m_context.getDeviceInterface();
1610 const VkDevice device = m_context.getDevice();
1611 const VkQueue queue = m_context.getUniversalQueue();
1612 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1613 Allocator& allocator = m_context.getDefaultAllocator();
1615 // Create two output buffers
1617 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1618 const Buffer buffer0(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1619 const Buffer buffer1(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1621 // Create descriptor set
1623 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1624 DescriptorSetLayoutBuilder()
1625 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1626 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1627 .build(vk, device));
1629 const Unique<VkDescriptorPool> descriptorPool(
1630 DescriptorPoolBuilder()
1631 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1632 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1634 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1636 const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1637 const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1638 DescriptorSetUpdateBuilder()
1639 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1640 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1641 .update(vk, device);
1643 // Perform the computation
1645 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
1646 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1647 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
1649 const VkBufferMemoryBarrier shaderWriteBarriers[] =
1651 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes),
1652 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes)
1655 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1656 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1658 // Start recording commands
1660 beginCommandBuffer(vk, *cmdBuffer);
1662 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1663 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1665 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1666 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, DE_LENGTH_OF_ARRAY(shaderWriteBarriers), shaderWriteBarriers, 0, (const VkImageMemoryBarrier*)DE_NULL);
1668 endCommandBuffer(vk, *cmdBuffer);
1670 // Wait for completion
1672 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1674 // Validate the results
1676 const Allocation& buffer0Allocation = buffer0.getAllocation();
1677 invalidateMappedMemoryRange(vk, device, buffer0Allocation.getMemory(), buffer0Allocation.getOffset(), bufferSizeBytes);
1678 const deUint32* buffer0Ptr = static_cast<deUint32*>(buffer0Allocation.getHostPtr());
1680 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1682 const deUint32 res = buffer0Ptr[ndx];
1683 const deUint32 ref = ndx;
1687 std::ostringstream msg;
1688 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1689 return tcu::TestStatus::fail(msg.str());
1694 const Allocation& buffer1Allocation = buffer1.getAllocation();
1695 invalidateMappedMemoryRange(vk, device, buffer1Allocation.getMemory(), buffer1Allocation.getOffset(), bufferSizeBytes);
1696 const deUint32* buffer1Ptr = static_cast<deUint32*>(buffer1Allocation.getHostPtr());
1698 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1700 const deUint32 res = buffer1Ptr[ndx];
1701 const deUint32 ref = m_numValues - ndx;
1705 std::ostringstream msg;
1706 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1707 return tcu::TestStatus::fail(msg.str());
1711 return tcu::TestStatus::pass("Compute succeeded");
1714 class SSBOBarrierTest : public vkt::TestCase
1717 SSBOBarrierTest (tcu::TestContext& testCtx,
1718 const std::string& name,
1719 const std::string& description,
1720 const tcu::IVec3& workSize);
1722 void initPrograms (SourceCollections& sourceCollections) const;
1723 TestInstance* createInstance (Context& context) const;
1726 const tcu::IVec3 m_workSize;
1729 class SSBOBarrierTestInstance : public vkt::TestInstance
1732 SSBOBarrierTestInstance (Context& context,
1733 const tcu::IVec3& workSize);
1735 tcu::TestStatus iterate (void);
1738 const tcu::IVec3 m_workSize;
1741 SSBOBarrierTest::SSBOBarrierTest (tcu::TestContext& testCtx,
1742 const std::string& name,
1743 const std::string& description,
1744 const tcu::IVec3& workSize)
1745 : TestCase (testCtx, name, description)
1746 , m_workSize (workSize)
1750 void SSBOBarrierTest::initPrograms (SourceCollections& sourceCollections) const
1752 sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
1754 "layout (local_size_x = 1) in;\n"
1755 "layout(binding = 2) readonly uniform Constants {\n"
1756 " uint u_baseVal;\n"
1758 "layout(binding = 1) writeonly buffer Output {\n"
1761 "void main (void) {\n"
1762 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1763 " values[offset] = u_baseVal + offset;\n"
1766 sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
1768 "layout (local_size_x = 1) in;\n"
1769 "layout(binding = 1) readonly buffer Input {\n"
1772 "layout(binding = 0) coherent buffer Output {\n"
1775 "void main (void) {\n"
1776 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1777 " uint value = values[offset];\n"
1778 " atomicAdd(sum, value);\n"
1782 TestInstance* SSBOBarrierTest::createInstance (Context& context) const
1784 return new SSBOBarrierTestInstance(context, m_workSize);
1787 SSBOBarrierTestInstance::SSBOBarrierTestInstance (Context& context, const tcu::IVec3& workSize)
1788 : TestInstance (context)
1789 , m_workSize (workSize)
1793 tcu::TestStatus SSBOBarrierTestInstance::iterate (void)
1795 const DeviceInterface& vk = m_context.getDeviceInterface();
1796 const VkDevice device = m_context.getDevice();
1797 const VkQueue queue = m_context.getUniversalQueue();
1798 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1799 Allocator& allocator = m_context.getDefaultAllocator();
1801 // Create a work buffer used by both shaders
1803 const int workGroupCount = multiplyComponents(m_workSize);
1804 const VkDeviceSize workBufferSizeBytes = sizeof(deUint32) * workGroupCount;
1805 const Buffer workBuffer(vk, device, allocator, makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any);
1807 // Create an output buffer
1809 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
1810 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1812 // Initialize atomic counter value to zero
1814 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1815 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1816 *outputBufferPtr = 0;
1817 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1820 // Create a uniform buffer (to pass uniform constants)
1822 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
1823 const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
1825 // Set the constants in the uniform buffer
1827 const deUint32 baseValue = 127;
1829 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
1830 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
1831 uniformBufferPtr[0] = baseValue;
1833 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
1836 // Create descriptor set
1838 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1839 DescriptorSetLayoutBuilder()
1840 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1841 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1842 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1843 .build(vk, device));
1845 const Unique<VkDescriptorPool> descriptorPool(
1846 DescriptorPoolBuilder()
1847 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1848 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1849 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1851 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1853 const VkDescriptorBufferInfo workBufferDescriptorInfo = makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1854 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1855 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1856 DescriptorSetUpdateBuilder()
1857 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1858 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1859 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1860 .update(vk, device);
1862 // Perform the computation
1864 const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
1865 const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
1867 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
1868 const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
1869 const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
1871 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
1873 const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
1875 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
1877 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1878 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
1880 // Start recording commands
1882 beginCommandBuffer(vk, *cmdBuffer);
1884 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
1885 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1887 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1889 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1890 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &betweenShadersBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1892 // Switch to the second shader program
1893 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
1895 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1896 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1898 endCommandBuffer(vk, *cmdBuffer);
1900 // Wait for completion
1902 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1904 // Validate the results
1906 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1907 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
1909 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1910 const deUint32 res = *bufferPtr;
1913 for (int ndx = 0; ndx < workGroupCount; ++ndx)
1914 ref += baseValue + ndx;
1918 std::ostringstream msg;
1919 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
1920 return tcu::TestStatus::fail(msg.str());
1922 return tcu::TestStatus::pass("Compute succeeded");
1925 class ImageAtomicOpTest : public vkt::TestCase
1928 ImageAtomicOpTest (tcu::TestContext& testCtx,
1929 const std::string& name,
1930 const std::string& description,
1931 const deUint32 localSize,
1932 const tcu::IVec2& imageSize);
1934 void initPrograms (SourceCollections& sourceCollections) const;
1935 TestInstance* createInstance (Context& context) const;
1938 const deUint32 m_localSize;
1939 const tcu::IVec2 m_imageSize;
1942 class ImageAtomicOpTestInstance : public vkt::TestInstance
1945 ImageAtomicOpTestInstance (Context& context,
1946 const deUint32 localSize,
1947 const tcu::IVec2& imageSize);
1949 tcu::TestStatus iterate (void);
1952 const deUint32 m_localSize;
1953 const tcu::IVec2 m_imageSize;
1956 ImageAtomicOpTest::ImageAtomicOpTest (tcu::TestContext& testCtx,
1957 const std::string& name,
1958 const std::string& description,
1959 const deUint32 localSize,
1960 const tcu::IVec2& imageSize)
1961 : TestCase (testCtx, name, description)
1962 , m_localSize (localSize)
1963 , m_imageSize (imageSize)
1967 void ImageAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
1969 std::ostringstream src;
1970 src << "#version 310 es\n"
1971 << "#extension GL_OES_shader_image_atomic : require\n"
1972 << "layout (local_size_x = " << m_localSize << ") in;\n"
1973 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
1974 << "layout(binding = 0) readonly buffer Input {\n"
1975 << " uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
1977 << "void main (void) {\n"
1978 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1979 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1981 << " if (gl_LocalInvocationIndex == 0u)\n"
1982 << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1983 << " memoryBarrierImage();\n"
1985 << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1988 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1991 TestInstance* ImageAtomicOpTest::createInstance (Context& context) const
1993 return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize);
1996 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance (Context& context, const deUint32 localSize, const tcu::IVec2& imageSize)
1997 : TestInstance (context)
1998 , m_localSize (localSize)
1999 , m_imageSize (imageSize)
2003 tcu::TestStatus ImageAtomicOpTestInstance::iterate (void)
2005 const DeviceInterface& vk = m_context.getDeviceInterface();
2006 const VkDevice device = m_context.getDevice();
2007 const VkQueue queue = m_context.getUniversalQueue();
2008 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2009 Allocator& allocator = m_context.getDefaultAllocator();
2013 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2014 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2016 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2017 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2021 const deUint32 numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2022 const VkDeviceSize inputBufferSizeBytes = sizeof(deUint32) * numInputValues;
2024 const Buffer inputBuffer(vk, device, allocator, makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2026 // Populate the input buffer with test data
2028 de::Random rnd(0x77238ac2);
2029 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
2030 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
2031 for (deUint32 i = 0; i < numInputValues; ++i)
2032 *bufferPtr++ = rnd.getUint32();
2034 flushMappedMemoryRange(vk, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), inputBufferSizeBytes);
2037 // Create a buffer to store shader output (copied from image data)
2039 const deUint32 imageArea = multiplyComponents(m_imageSize);
2040 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32) * imageArea;
2041 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
2043 // Create descriptor set
2045 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2046 DescriptorSetLayoutBuilder()
2047 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2048 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2049 .build(vk, device));
2051 const Unique<VkDescriptorPool> descriptorPool(
2052 DescriptorPoolBuilder()
2053 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2054 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2055 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2057 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2061 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2062 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2064 DescriptorSetUpdateBuilder()
2065 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2066 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2067 .update(vk, device);
2069 // Perform the computation
2071 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
2072 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2073 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2075 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2077 const VkImageMemoryBarrier imagePreCopyBarrier = makeImageMemoryBarrier(
2078 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
2079 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2080 *image, subresourceRange);
2082 const VkBufferMemoryBarrier outputBufferPostCopyBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2084 const VkBufferImageCopy copyParams = makeBufferImageCopy(m_imageSize);
2086 // Prepare the command buffer
2088 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2089 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2091 // Start recording commands
2093 beginCommandBuffer(vk, *cmdBuffer);
2095 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2096 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2098 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2099 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2101 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imagePreCopyBarrier);
2102 vk.cmdCopyImageToBuffer(*cmdBuffer, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, ©Params);
2103 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &outputBufferPostCopyBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2105 endCommandBuffer(vk, *cmdBuffer);
2107 // Wait for completion
2109 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2112 // Validate the results
2114 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2115 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2117 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2118 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
2120 for (deUint32 pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2122 const deUint32 res = bufferPtr[pixelNdx];
2125 for (deUint32 offs = 0; offs < m_localSize; ++offs)
2126 ref += refBufferPtr[pixelNdx * m_localSize + offs];
2130 std::ostringstream msg;
2131 msg << "Comparison failed for pixel " << pixelNdx;
2132 return tcu::TestStatus::fail(msg.str());
2135 return tcu::TestStatus::pass("Compute succeeded");
2138 class ImageBarrierTest : public vkt::TestCase
2141 ImageBarrierTest (tcu::TestContext& testCtx,
2142 const std::string& name,
2143 const std::string& description,
2144 const tcu::IVec2& imageSize);
2146 void initPrograms (SourceCollections& sourceCollections) const;
2147 TestInstance* createInstance (Context& context) const;
2150 const tcu::IVec2 m_imageSize;
2153 class ImageBarrierTestInstance : public vkt::TestInstance
2156 ImageBarrierTestInstance (Context& context,
2157 const tcu::IVec2& imageSize);
2159 tcu::TestStatus iterate (void);
2162 const tcu::IVec2 m_imageSize;
2165 ImageBarrierTest::ImageBarrierTest (tcu::TestContext& testCtx,
2166 const std::string& name,
2167 const std::string& description,
2168 const tcu::IVec2& imageSize)
2169 : TestCase (testCtx, name, description)
2170 , m_imageSize (imageSize)
2174 void ImageBarrierTest::initPrograms (SourceCollections& sourceCollections) const
2176 sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
2178 "layout (local_size_x = 1) in;\n"
2179 "layout(binding = 2) readonly uniform Constants {\n"
2180 " uint u_baseVal;\n"
2182 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2183 "void main (void) {\n"
2184 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2185 " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2188 sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
2190 "layout (local_size_x = 1) in;\n"
2191 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2192 "layout(binding = 0) coherent buffer Output {\n"
2195 "void main (void) {\n"
2196 " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2197 " atomicAdd(sum, value);\n"
2201 TestInstance* ImageBarrierTest::createInstance (Context& context) const
2203 return new ImageBarrierTestInstance(context, m_imageSize);
2206 ImageBarrierTestInstance::ImageBarrierTestInstance (Context& context, const tcu::IVec2& imageSize)
2207 : TestInstance (context)
2208 , m_imageSize (imageSize)
2212 tcu::TestStatus ImageBarrierTestInstance::iterate (void)
2214 const DeviceInterface& vk = m_context.getDeviceInterface();
2215 const VkDevice device = m_context.getDevice();
2216 const VkQueue queue = m_context.getUniversalQueue();
2217 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2218 Allocator& allocator = m_context.getDefaultAllocator();
2220 // Create an image used by both shaders
2222 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2223 const Image image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2225 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2226 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2228 // Create an output buffer
2230 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
2231 const Buffer outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2233 // Initialize atomic counter value to zero
2235 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2236 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2237 *outputBufferPtr = 0;
2238 flushMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2241 // Create a uniform buffer (to pass uniform constants)
2243 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
2244 const Buffer uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2246 // Set the constants in the uniform buffer
2248 const deUint32 baseValue = 127;
2250 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
2251 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
2252 uniformBufferPtr[0] = baseValue;
2254 flushMappedMemoryRange(vk, device, uniformBufferAllocation.getMemory(), uniformBufferAllocation.getOffset(), uniformBufferSizeBytes);
2257 // Create descriptor set
2259 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2260 DescriptorSetLayoutBuilder()
2261 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2262 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2263 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2264 .build(vk, device));
2266 const Unique<VkDescriptorPool> descriptorPool(
2267 DescriptorPoolBuilder()
2268 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2269 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2270 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2271 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2273 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2275 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2276 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2277 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2278 DescriptorSetUpdateBuilder()
2279 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2280 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2281 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2282 .update(vk, device);
2284 // Perform the computation
2286 const Unique<VkShaderModule> shaderModule0(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp0"), 0));
2287 const Unique<VkShaderModule> shaderModule1(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp1"), 0));
2289 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2290 const Unique<VkPipeline> pipeline0(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule0));
2291 const Unique<VkPipeline> pipeline1(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule1));
2293 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2295 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2297 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2298 *image, subresourceRange);
2300 const VkImageMemoryBarrier imageBarrierBetweenShaders = makeImageMemoryBarrier(
2301 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
2302 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2303 *image, subresourceRange);
2305 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2307 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2308 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2310 // Start recording commands
2312 beginCommandBuffer(vk, *cmdBuffer);
2314 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline0);
2315 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2317 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 1, &imageLayoutBarrier);
2319 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2320 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imageBarrierBetweenShaders);
2322 // Switch to the second shader program
2323 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline1);
2325 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2326 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2328 endCommandBuffer(vk, *cmdBuffer);
2330 // Wait for completion
2332 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2334 // Validate the results
2336 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2337 invalidateMappedMemoryRange(vk, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outputBufferSizeBytes);
2339 const int numValues = multiplyComponents(m_imageSize);
2340 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2341 const deUint32 res = *bufferPtr;
2344 for (int ndx = 0; ndx < numValues; ++ndx)
2345 ref += baseValue + ndx;
2349 std::ostringstream msg;
2350 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2351 return tcu::TestStatus::fail(msg.str());
2353 return tcu::TestStatus::pass("Compute succeeded");
2356 namespace EmptyShaderTest
2359 void createProgram (SourceCollections& dst)
2361 dst.glslSources.add("comp") << glu::ComputeSource(
2363 "layout (local_size_x = 1) in;\n"
2364 "void main (void) {}\n"
2368 tcu::TestStatus createTest (Context& context)
2370 const DeviceInterface& vk = context.getDeviceInterface();
2371 const VkDevice device = context.getDevice();
2372 const VkQueue queue = context.getUniversalQueue();
2373 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2375 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u));
2377 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device));
2378 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
2380 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2381 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(vk, device, *cmdPool));
2383 // Start recording commands
2385 beginCommandBuffer(vk, *cmdBuffer);
2387 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2389 const tcu::IVec3 workGroups(1, 1, 1);
2390 vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
2392 endCommandBuffer(vk, *cmdBuffer);
2394 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2396 return tcu::TestStatus::pass("Compute succeeded");
2399 } // EmptyShaderTest ns
2402 tcu::TestCaseGroup* createBasicComputeShaderTests (tcu::TestContext& testCtx)
2404 de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic", "Basic compute tests"));
2406 addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", "Shader that does nothing", EmptyShaderTest::createProgram, EmptyShaderTest::createTest);
2408 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_invocation", "Copy from UBO to SSBO, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2409 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_group", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(2,1,4), tcu::IVec3(1,1,1)));
2410 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_invocations", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1)));
2411 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_groups", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2413 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_single_invocation", "Copy between SSBOs, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2414 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_invocations", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1)));
2415 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_groups", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2417 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_single_invocation", "Read and write same SSBO", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2418 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_multiple_groups", "Read and write same SSBO", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2419 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_single_invocation", "Read and write same SSBO", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2420 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_multiple_groups", "Read and write same SSBO", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2422 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_single_invocation", "Write to multiple SSBOs", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2423 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_multiple_groups", "Write to multiple SSBOs", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2424 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2425 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_multiple_groups", "Write to multiple SSBOs", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4)));
2427 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_invocation", "SSBO local barrier usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2428 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_group", "SSBO local barrier usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2429 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_multiple_groups", "SSBO local barrier usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2431 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_single", "SSBO memory barrier usage", tcu::IVec3(1,1,1)));
2432 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_multiple", "SSBO memory barrier usage", tcu::IVec3(11,5,7)));
2434 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_invocation", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2435 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_group", "Basic shared variable usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2436 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_invocations", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4)));
2437 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_groups", "Basic shared variable usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2439 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_invocation", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1)));
2440 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_group", "Atomic operation with shared var", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1)));
2441 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_invocations", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4)));
2442 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_groups", "Atomic operation with shared var", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3)));
2444 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_small", "Image to SSBO copy", tcu::IVec2(1,1), tcu::IVec2(64,64)));
2445 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_large", "Image to SSBO copy", tcu::IVec2(2,4), tcu::IVec2(512,512)));
2447 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_small", "SSBO to image copy", tcu::IVec2(1, 1), tcu::IVec2(64, 64)));
2448 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_large", "SSBO to image copy", tcu::IVec2(2, 4), tcu::IVec2(512, 512)));
2450 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_1", "Atomic operation with image", 1, tcu::IVec2(64,64)));
2451 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_8", "Atomic operation with image", 8, tcu::IVec2(64,64)));
2453 basicComputeTests->addChild(new ImageBarrierTest(testCtx, "image_barrier_single", "Image barrier", tcu::IVec2(1,1)));
2454 basicComputeTests->addChild(new ImageBarrierTest(testCtx, "image_barrier_multiple", "Image barrier", tcu::IVec2(64,64)));
2456 return basicComputeTests.release();