1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 * \file vktSparseResourcesBufferSparseResidency.cpp
21 * \brief Sparse partially resident buffers tests
22 *//*--------------------------------------------------------------------*/
24 #include "vktSparseResourcesBufferSparseResidency.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkQueryUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkTypeUtil.hpp"
40 #include "deStringUtil.hpp"
41 #include "deUniquePtr.hpp"
57 SIZE_OF_UINT_IN_SHADER = 4u,
60 class BufferSparseResidencyCase : public TestCase
63 BufferSparseResidencyCase (tcu::TestContext& testCtx,
64 const std::string& name,
65 const std::string& description,
66 const deUint32 bufferSize,
67 const glu::GLSLVersion glslVersion);
69 void initPrograms (SourceCollections& sourceCollections) const;
70 TestInstance* createInstance (Context& context) const;
73 const deUint32 m_bufferSize;
74 const glu::GLSLVersion m_glslVersion;
77 BufferSparseResidencyCase::BufferSparseResidencyCase (tcu::TestContext& testCtx,
78 const std::string& name,
79 const std::string& description,
80 const deUint32 bufferSize,
81 const glu::GLSLVersion glslVersion)
82 : TestCase (testCtx, name, description)
83 , m_bufferSize (bufferSize)
84 , m_glslVersion (glslVersion)
88 void BufferSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const
90 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
91 const deUint32 iterationsCount = m_bufferSize / SIZE_OF_UINT_IN_SHADER;
93 std::ostringstream src;
95 src << versionDecl << "\n"
96 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
97 << "layout(set = 0, binding = 0, std430) readonly buffer Input\n"
102 << "layout(set = 0, binding = 1, std430) writeonly buffer Output\n"
104 << " uint result[];\n"
107 << "void main (void)\n"
109 << " for(int i=0; i<" << iterationsCount << "; ++i) \n"
111 << " sb_out.result[i] = sb_in.data[i];"
115 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
118 class BufferSparseResidencyInstance : public SparseResourcesBaseInstance
121 BufferSparseResidencyInstance (Context& context,
122 const deUint32 bufferSize);
124 tcu::TestStatus iterate (void);
127 const deUint32 m_bufferSize;
130 BufferSparseResidencyInstance::BufferSparseResidencyInstance (Context& context,
131 const deUint32 bufferSize)
132 : SparseResourcesBaseInstance (context)
133 , m_bufferSize (bufferSize)
137 tcu::TestStatus BufferSparseResidencyInstance::iterate (void)
139 const InstanceInterface& instance = m_context.getInstanceInterface();
140 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
141 const VkPhysicalDeviceProperties physicalDeviceProperties = getPhysicalDeviceProperties(instance, physicalDevice);
143 if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyBuffer)
144 TCU_THROW(NotSupportedError, "Sparse partially resident buffers not supported");
147 // Create logical device supporting both sparse and compute operations
148 QueueRequirementsVec queueRequirements;
149 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
150 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
152 createDeviceSupportingQueues(queueRequirements);
155 const DeviceInterface& deviceInterface = getDeviceInterface();
156 const Queue& sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
157 const Queue& computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
159 VkBufferCreateInfo bufferCreateInfo =
161 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
162 DE_NULL, // const void* pNext;
163 VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
164 VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT, // VkBufferCreateFlags flags;
165 m_bufferSize, // VkDeviceSize size;
166 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
167 VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // VkBufferUsageFlags usage;
168 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
169 0u, // deUint32 queueFamilyIndexCount;
170 DE_NULL // const deUint32* pQueueFamilyIndices;
173 const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex };
175 if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
177 bufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
178 bufferCreateInfo.queueFamilyIndexCount = 2u;
179 bufferCreateInfo.pQueueFamilyIndices = queueFamilyIndices;
182 // Create sparse buffer
183 const Unique<VkBuffer> sparseBuffer(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
185 // Create sparse buffer memory bind semaphore
186 const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
188 const VkMemoryRequirements bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBuffer);
190 if (bufferMemRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
191 TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
193 DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
195 const deUint32 numSparseSlots = static_cast<deUint32>(bufferMemRequirements.size / bufferMemRequirements.alignment);
196 std::vector<DeviceMemorySp> deviceMemUniquePtrVec;
199 std::vector<VkSparseMemoryBind> sparseMemoryBinds;
200 const deUint32 memoryType = findMatchingMemoryType(instance, physicalDevice, bufferMemRequirements, MemoryRequirement::Any);
202 if (memoryType == NO_MATCH_FOUND)
203 return tcu::TestStatus::fail("No matching memory type found");
205 for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2)
207 const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.alignment, memoryType, bufferMemRequirements.alignment * sparseBindNdx);
209 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(sparseMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
211 sparseMemoryBinds.push_back(sparseMemoryBind);
214 const VkSparseBufferMemoryBindInfo sparseBufferBindInfo = makeSparseBufferMemoryBindInfo(*sparseBuffer, static_cast<deUint32>(sparseMemoryBinds.size()), &sparseMemoryBinds[0]);
216 const VkBindSparseInfo bindSparseInfo =
218 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType;
219 DE_NULL, //const void* pNext;
220 0u, //deUint32 waitSemaphoreCount;
221 DE_NULL, //const VkSemaphore* pWaitSemaphores;
222 1u, //deUint32 bufferBindCount;
223 &sparseBufferBindInfo, //const VkSparseBufferMemoryBindInfo* pBufferBinds;
224 0u, //deUint32 imageOpaqueBindCount;
225 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
226 0u, //deUint32 imageBindCount;
227 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds;
228 1u, //deUint32 signalSemaphoreCount;
229 &bufferMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
232 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
235 // Create input buffer
236 const VkBufferCreateInfo inputBufferCreateInfo = makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
237 const Unique<VkBuffer> inputBuffer (createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo));
238 const de::UniquePtr<Allocation> inputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible));
241 std::vector<deUint8> referenceData;
242 referenceData.resize(m_bufferSize);
244 for (deUint32 valueNdx = 0; valueNdx < m_bufferSize; ++valueNdx)
246 referenceData[valueNdx] = static_cast<deUint8>((valueNdx % bufferMemRequirements.alignment) + 1u);
249 deMemcpy(inputBufferAlloc->getHostPtr(), &referenceData[0], m_bufferSize);
251 flushMappedMemoryRange(deviceInterface, getDevice(), inputBufferAlloc->getMemory(), inputBufferAlloc->getOffset(), m_bufferSize);
253 // Create output buffer
254 const VkBufferCreateInfo outputBufferCreateInfo = makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
255 const Unique<VkBuffer> outputBuffer (createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
256 const de::UniquePtr<Allocation> outputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
258 // Create command buffer for compute and data transfer oparations
259 const Unique<VkCommandPool> commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
260 const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
262 // Start recording compute and transfer commands
263 beginCommandBuffer(deviceInterface, *commandBuffer);
265 // Create descriptor set
266 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
267 DescriptorSetLayoutBuilder()
268 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
269 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
270 .build(deviceInterface, getDevice()));
272 // Create compute pipeline
273 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
274 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
275 const Unique<VkPipeline> computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
277 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
279 const Unique<VkDescriptorPool> descriptorPool(
280 DescriptorPoolBuilder()
281 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
282 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
284 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
287 const VkDescriptorBufferInfo inputBufferInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, m_bufferSize);
288 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBuffer, 0ull, m_bufferSize);
290 DescriptorSetUpdateBuilder()
291 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferInfo)
292 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
293 .update(deviceInterface, getDevice());
296 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
299 const VkBufferMemoryBarrier inputBufferBarrier
300 = makeBufferMemoryBarrier( VK_ACCESS_HOST_WRITE_BIT,
301 VK_ACCESS_SHADER_READ_BIT,
306 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL);
309 deviceInterface.cmdDispatch(*commandBuffer, 1u, 1u, 1u);
312 const VkBufferMemoryBarrier sparseBufferBarrier
313 = makeBufferMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
314 VK_ACCESS_TRANSFER_READ_BIT,
319 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferBarrier, 0u, DE_NULL);
323 const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSize);
325 deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBuffer, *outputBuffer, 1u, &bufferCopy);
329 const VkBufferMemoryBarrier outputBufferBarrier
330 = makeBufferMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
331 VK_ACCESS_HOST_READ_BIT,
336 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL);
339 // End recording compute and transfer commands
340 endCommandBuffer(deviceInterface, *commandBuffer);
342 const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT };
344 // Submit transfer commands for execution and wait for completion
345 submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(), waitStageBits);
347 // Retrieve data from output buffer to host memory
348 invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), m_bufferSize);
350 const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
352 // Wait for sparse queue to become idle
353 deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
355 // Compare output data with reference data
356 for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; ++sparseBindNdx)
358 const deUint32 alignment = static_cast<deUint32>(bufferMemRequirements.alignment);
359 const deUint32 offset = alignment * sparseBindNdx;
360 const deUint32 size = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment;
362 if (sparseBindNdx % 2u == 0u)
364 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
365 return tcu::TestStatus::fail("Failed");
367 else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
369 deMemset(&referenceData[offset], 0u, size);
371 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
372 return tcu::TestStatus::fail("Failed");
376 return tcu::TestStatus::pass("Passed");
379 TestInstance* BufferSparseResidencyCase::createInstance (Context& context) const
381 return new BufferSparseResidencyInstance(context, m_bufferSize);
386 void addBufferSparseResidencyTests(tcu::TestCaseGroup* group)
388 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440));
389 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440));
390 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440));
391 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440));
392 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440));
393 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440));