1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 * \file vktSparseResourcesBufferMemoryAliasing.cpp
21 * \brief Sparse buffer memory aliasing tests
22 *//*--------------------------------------------------------------------*/
24 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkQueryUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkTypeUtil.hpp"
40 #include "deStringUtil.hpp"
41 #include "deUniquePtr.hpp"
57 SIZE_OF_UINT_IN_SHADER = 4u,
58 MODULO_DIVISOR = 1024u
61 tcu::UVec3 computeWorkGroupSize (const deUint32 numInvocations)
63 const deUint32 maxComputeWorkGroupInvocations = 128u;
64 const tcu::UVec3 maxComputeWorkGroupSize = tcu::UVec3(128u, 128u, 64u);
65 deUint32 numInvocationsLeft = numInvocations;
67 const deUint32 xWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
68 numInvocationsLeft = numInvocationsLeft / xWorkGroupSize + ((numInvocationsLeft % xWorkGroupSize) ? 1u : 0u);
70 const deUint32 yWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations / xWorkGroupSize);
71 numInvocationsLeft = numInvocationsLeft / yWorkGroupSize + ((numInvocationsLeft % yWorkGroupSize) ? 1u : 0u);
73 const deUint32 zWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
74 numInvocationsLeft = numInvocationsLeft / zWorkGroupSize + ((numInvocationsLeft % zWorkGroupSize) ? 1u : 0u);
76 return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
79 class BufferSparseMemoryAliasingCase : public TestCase
82 BufferSparseMemoryAliasingCase (tcu::TestContext& testCtx,
83 const std::string& name,
84 const std::string& description,
85 const deUint32 bufferSize,
86 const glu::GLSLVersion glslVersion);
88 void initPrograms (SourceCollections& sourceCollections) const;
89 TestInstance* createInstance (Context& context) const;
92 const deUint32 m_bufferSizeInBytes;
93 const glu::GLSLVersion m_glslVersion;
96 BufferSparseMemoryAliasingCase::BufferSparseMemoryAliasingCase (tcu::TestContext& testCtx,
97 const std::string& name,
98 const std::string& description,
99 const deUint32 bufferSize,
100 const glu::GLSLVersion glslVersion)
101 : TestCase (testCtx, name, description)
102 , m_bufferSizeInBytes (bufferSize)
103 , m_glslVersion (glslVersion)
107 void BufferSparseMemoryAliasingCase::initPrograms (SourceCollections& sourceCollections) const
109 // Create compute program
110 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
111 const deUint32 numInvocations = m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER;
112 const tcu::UVec3 workGroupSize = computeWorkGroupSize(numInvocations);
114 std::ostringstream src;
115 src << versionDecl << "\n"
116 << "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in;\n"
117 << "layout(set = 0, binding = 0, std430) writeonly buffer Output\n"
119 << " uint result[];\n"
122 << "void main (void)\n"
124 << " uint index = gl_GlobalInvocationID.x + (gl_GlobalInvocationID.y + gl_GlobalInvocationID.z*gl_NumWorkGroups.y*gl_WorkGroupSize.y)*gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
125 << " if ( index < " << m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER << "u )\n"
127 << " sb_out.result[index] = index % " << MODULO_DIVISOR << "u;\n"
131 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
134 class BufferSparseMemoryAliasingInstance : public SparseResourcesBaseInstance
137 BufferSparseMemoryAliasingInstance (Context& context,
138 const deUint32 bufferSize);
140 tcu::TestStatus iterate (void);
143 const deUint32 m_bufferSizeInBytes;
146 BufferSparseMemoryAliasingInstance::BufferSparseMemoryAliasingInstance (Context& context,
147 const deUint32 bufferSize)
148 : SparseResourcesBaseInstance (context)
149 , m_bufferSizeInBytes (bufferSize)
153 tcu::TestStatus BufferSparseMemoryAliasingInstance::iterate (void)
155 const InstanceInterface& instance = m_context.getInstanceInterface();
156 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
158 if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseBinding)
159 TCU_THROW(NotSupportedError, "Sparse binding not supported");
161 if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyAliased)
162 TCU_THROW(NotSupportedError, "Sparse memory aliasing not supported");
165 // Create logical device supporting both sparse and compute operations
166 QueueRequirementsVec queueRequirements;
167 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
168 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
170 createDeviceSupportingQueues(queueRequirements);
173 const DeviceInterface& deviceInterface = getDeviceInterface();
174 const Queue& sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
175 const Queue& computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
177 VkBufferCreateInfo bufferCreateInfo =
179 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
180 DE_NULL, // const void* pNext;
181 VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
182 VK_BUFFER_CREATE_SPARSE_ALIASED_BIT, // VkBufferCreateFlags flags;
183 m_bufferSizeInBytes, // VkDeviceSize size;
184 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
185 VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // VkBufferUsageFlags usage;
186 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
187 0u, // deUint32 queueFamilyIndexCount;
188 DE_NULL // const deUint32* pQueueFamilyIndices;
191 const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex };
193 if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
195 bufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
196 bufferCreateInfo.queueFamilyIndexCount = 2u;
197 bufferCreateInfo.pQueueFamilyIndices = queueFamilyIndices;
200 // Create sparse buffers
201 const Unique<VkBuffer> sparseBufferWrite(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
202 const Unique<VkBuffer> sparseBufferRead (createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
204 // Create sparse buffers memory bind semaphore
205 const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
207 const VkMemoryRequirements bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBufferWrite);
209 if (bufferMemRequirements.size > getPhysicalDeviceProperties(instance, physicalDevice).limits.sparseAddressSpaceSize)
210 TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
212 DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
214 const deUint32 memoryType = findMatchingMemoryType(instance, physicalDevice, bufferMemRequirements, MemoryRequirement::Any);
216 if (memoryType == NO_MATCH_FOUND)
217 return tcu::TestStatus::fail("No matching memory type found");
219 const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.size, memoryType, 0u);
221 Move<VkDeviceMemory> deviceMemoryPtr(check<VkDeviceMemory>(sparseMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL));
224 const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo[2] =
226 makeSparseBufferMemoryBindInfo
227 (*sparseBufferWrite, //VkBuffer buffer;
228 1u, //deUint32 bindCount;
229 &sparseMemoryBind //const VkSparseMemoryBind* Binds;
232 makeSparseBufferMemoryBindInfo
233 (*sparseBufferRead, //VkBuffer buffer;
234 1u, //deUint32 bindCount;
235 &sparseMemoryBind //const VkSparseMemoryBind* Binds;
239 const VkBindSparseInfo bindSparseInfo =
241 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType;
242 DE_NULL, //const void* pNext;
243 0u, //deUint32 waitSemaphoreCount;
244 DE_NULL, //const VkSemaphore* pWaitSemaphores;
245 2u, //deUint32 bufferBindCount;
246 sparseBufferMemoryBindInfo, //const VkSparseBufferMemoryBindInfo* pBufferBinds;
247 0u, //deUint32 imageOpaqueBindCount;
248 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
249 0u, //deUint32 imageBindCount;
250 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds;
251 1u, //deUint32 signalSemaphoreCount;
252 &bufferMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
255 // Submit sparse bind commands for execution
256 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
259 // Create output buffer
260 const VkBufferCreateInfo outputBufferCreateInfo = makeBufferCreateInfo(m_bufferSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
261 const Unique<VkBuffer> outputBuffer (createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
262 const de::UniquePtr<Allocation> outputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
264 // Create command buffer for compute and data transfer oparations
265 const Unique<VkCommandPool> commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
266 const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
268 // Start recording commands
269 beginCommandBuffer(deviceInterface, *commandBuffer);
271 // Create descriptor set
272 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
273 DescriptorSetLayoutBuilder()
274 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
275 .build(deviceInterface, getDevice()));
277 // Create compute pipeline
278 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
279 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
280 const Unique<VkPipeline> computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
282 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
284 // Create descriptor set
285 const Unique<VkDescriptorPool> descriptorPool(
286 DescriptorPoolBuilder()
287 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
288 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
290 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
293 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBufferWrite, 0u, m_bufferSizeInBytes);
295 DescriptorSetUpdateBuilder()
296 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
297 .update(deviceInterface, getDevice());
300 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
303 deUint32 numInvocationsLeft = m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER;
304 const tcu::UVec3 workGroupSize = computeWorkGroupSize(numInvocationsLeft);
305 const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
307 numInvocationsLeft -= workGroupSize.x()*workGroupSize.y()*workGroupSize.z();
309 const deUint32 xWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.x());
310 numInvocationsLeft = numInvocationsLeft / xWorkGroupCount + ((numInvocationsLeft % xWorkGroupCount) ? 1u : 0u);
311 const deUint32 yWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.y());
312 numInvocationsLeft = numInvocationsLeft / yWorkGroupCount + ((numInvocationsLeft % yWorkGroupCount) ? 1u : 0u);
313 const deUint32 zWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.z());
314 numInvocationsLeft = numInvocationsLeft / zWorkGroupCount + ((numInvocationsLeft % zWorkGroupCount) ? 1u : 0u);
316 if (numInvocationsLeft != 1u)
317 TCU_THROW(NotSupportedError, "Buffer size is not supported");
319 deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
323 const VkBufferMemoryBarrier sparseBufferWriteBarrier
324 = makeBufferMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
325 VK_ACCESS_TRANSFER_READ_BIT,
328 m_bufferSizeInBytes);
330 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferWriteBarrier, 0u, DE_NULL);
334 const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSizeInBytes);
336 deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBufferRead, *outputBuffer, 1u, &bufferCopy);
340 const VkBufferMemoryBarrier outputBufferHostBarrier
341 = makeBufferMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
342 VK_ACCESS_HOST_READ_BIT,
345 m_bufferSizeInBytes);
347 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostBarrier, 0u, DE_NULL);
350 // End recording commands
351 endCommandBuffer(deviceInterface, *commandBuffer);
353 // The stage at which execution is going to wait for finish of sparse binding operations
354 const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
356 // Submit commands for execution and wait for completion
357 submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(), waitStageBits);
359 // Retrieve data from output buffer to host memory
360 invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), m_bufferSizeInBytes);
362 const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
364 // Wait for sparse queue to become idle
365 deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
367 // Prepare reference data
368 std::vector<deUint8> referenceData;
369 referenceData.resize(m_bufferSizeInBytes);
371 std::vector<deUint32> referenceDataBlock;
372 referenceDataBlock.resize(MODULO_DIVISOR);
374 for (deUint32 valueNdx = 0; valueNdx < MODULO_DIVISOR; ++valueNdx)
376 referenceDataBlock[valueNdx] = valueNdx % MODULO_DIVISOR;
379 const deUint32 fullBlockSizeInBytes = MODULO_DIVISOR * SIZE_OF_UINT_IN_SHADER;
380 const deUint32 lastBlockSizeInBytes = m_bufferSizeInBytes % fullBlockSizeInBytes;
381 const deUint32 numberOfBlocks = m_bufferSizeInBytes / fullBlockSizeInBytes + (lastBlockSizeInBytes ? 1u : 0u);
383 for (deUint32 blockNdx = 0; blockNdx < numberOfBlocks; ++blockNdx)
385 const deUint32 offset = blockNdx * fullBlockSizeInBytes;
386 deMemcpy(&referenceData[0] + offset, &referenceDataBlock[0], ((offset + fullBlockSizeInBytes) <= m_bufferSizeInBytes) ? fullBlockSizeInBytes : lastBlockSizeInBytes);
389 // Compare reference data with output data
390 if (deMemCmp(&referenceData[0], outputData, m_bufferSizeInBytes) != 0)
391 return tcu::TestStatus::fail("Failed");
393 return tcu::TestStatus::pass("Passed");
396 TestInstance* BufferSparseMemoryAliasingCase::createInstance (Context& context) const
398 return new BufferSparseMemoryAliasingInstance(context, m_bufferSizeInBytes);
403 void addBufferSparseMemoryAliasingTests(tcu::TestCaseGroup* group)
405 group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440));
406 group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440));
407 group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440));
408 group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440));
409 group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440));
410 group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440));