1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 * \file vktSparseResourcesImageSparseResidency.cpp
21 * \brief Sparse partially resident images tests
22 *//*--------------------------------------------------------------------*/
24 #include "vktSparseResourcesBufferSparseBinding.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkMemUtil.hpp"
35 #include "vkBuilderUtil.hpp"
36 #include "vkImageUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkTypeUtil.hpp"
40 #include "deUniquePtr.hpp"
41 #include "deStringUtil.hpp"
55 const std::string getCoordStr (const ImageType imageType,
63 case IMAGE_TYPE_BUFFER:
66 case IMAGE_TYPE_1D_ARRAY:
68 return "ivec2(" + x + "," + y + ")";
70 case IMAGE_TYPE_2D_ARRAY:
73 case IMAGE_TYPE_CUBE_ARRAY:
74 return "ivec3(" + x + "," + y + "," + z + ")";
82 tcu::UVec3 alignedDivide (const VkExtent3D& extent, const VkExtent3D& divisor)
86 result.x() = extent.width / divisor.width + ((extent.width % divisor.width) ? 1u : 0u);
87 result.y() = extent.height / divisor.height + ((extent.height % divisor.height) ? 1u : 0u);
88 result.z() = extent.depth / divisor.depth + ((extent.depth % divisor.depth) ? 1u : 0u);
93 tcu::UVec3 computeWorkGroupSize (const tcu::UVec3& gridSize)
95 const deUint32 maxComputeWorkGroupInvocations = 128u;
96 const tcu::UVec3 maxComputeWorkGroupSize = tcu::UVec3(128u, 128u, 64u);
98 const deUint32 xWorkGroupSize = std::min(std::min(gridSize.x(), maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
99 const deUint32 yWorkGroupSize = std::min(std::min(gridSize.y(), maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations / xWorkGroupSize);
100 const deUint32 zWorkGroupSize = std::min(std::min(gridSize.z(), maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
102 return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
105 class ImageSparseResidencyCase : public TestCase
108 ImageSparseResidencyCase (tcu::TestContext& testCtx,
109 const std::string& name,
110 const std::string& description,
111 const ImageType imageType,
112 const tcu::UVec3& imageSize,
113 const tcu::TextureFormat& format,
114 const glu::GLSLVersion glslVersion);
116 void initPrograms (SourceCollections& sourceCollections) const;
117 TestInstance* createInstance (Context& context) const;
120 const ImageType m_imageType;
121 const tcu::UVec3 m_imageSize;
122 const tcu::TextureFormat m_format;
123 const glu::GLSLVersion m_glslVersion;
126 ImageSparseResidencyCase::ImageSparseResidencyCase (tcu::TestContext& testCtx,
127 const std::string& name,
128 const std::string& description,
129 const ImageType imageType,
130 const tcu::UVec3& imageSize,
131 const tcu::TextureFormat& format,
132 const glu::GLSLVersion glslVersion)
133 : TestCase (testCtx, name, description)
134 , m_imageType (imageType)
135 , m_imageSize (imageSize)
137 , m_glslVersion (glslVersion)
141 void ImageSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const
143 // Create compute program
144 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
145 const std::string imageTypeStr = getShaderImageType(m_format, m_imageType);
146 const std::string formatQualifierStr = getShaderImageFormatQualifier(m_format);
147 const std::string formatDataStr = getShaderImageDataType(m_format);
148 const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
149 const tcu::UVec3 workGroupSize = computeWorkGroupSize(gridSize);
151 std::ostringstream src;
152 src << versionDecl << "\n"
153 << "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in; \n"
154 << "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
155 << "void main (void)\n"
157 << " if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
158 << " if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
159 << " if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
161 << " imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
162 << formatDataStr << "( int(gl_GlobalInvocationID.x) % 127, int(gl_GlobalInvocationID.y) % 127, int(gl_GlobalInvocationID.z) % 127, 1));\n"
166 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
169 class ImageSparseResidencyInstance : public SparseResourcesBaseInstance
172 ImageSparseResidencyInstance(Context& context,
173 const ImageType imageType,
174 const tcu::UVec3& imageSize,
175 const tcu::TextureFormat& format);
177 tcu::TestStatus iterate (void);
180 const ImageType m_imageType;
181 const tcu::UVec3 m_imageSize;
182 const tcu::TextureFormat m_format;
185 ImageSparseResidencyInstance::ImageSparseResidencyInstance (Context& context,
186 const ImageType imageType,
187 const tcu::UVec3& imageSize,
188 const tcu::TextureFormat& format)
189 : SparseResourcesBaseInstance (context)
190 , m_imageType (imageType)
191 , m_imageSize (imageSize)
196 tcu::TestStatus ImageSparseResidencyInstance::iterate (void)
198 const InstanceInterface& instance = m_context.getInstanceInterface();
199 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
200 const VkPhysicalDeviceProperties physicalDeviceProperties = getPhysicalDeviceProperties(instance, physicalDevice);
201 VkImageCreateInfo imageCreateInfo;
202 VkSparseImageMemoryRequirements aspectRequirements;
203 VkExtent3D imageGranularity;
204 std::vector<DeviceMemorySp> deviceMemUniquePtrVec;
206 // Check if image size does not exceed device limits
207 if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
208 TCU_THROW(NotSupportedError, "Image size not supported for device");
210 // Check if device supports sparse operations for image type
211 if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
212 TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
214 imageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
215 imageCreateInfo.pNext = DE_NULL;
216 imageCreateInfo.flags = VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
217 imageCreateInfo.imageType = mapImageType(m_imageType);
218 imageCreateInfo.format = mapTextureFormat(m_format);
219 imageCreateInfo.extent = makeExtent3D(getLayerSize(m_imageType, m_imageSize));
220 imageCreateInfo.mipLevels = 1u;
221 imageCreateInfo.arrayLayers = getNumLayers(m_imageType, m_imageSize);
222 imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
223 imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
224 imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
225 imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
226 VK_IMAGE_USAGE_STORAGE_BIT;
227 imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
228 imageCreateInfo.queueFamilyIndexCount = 0u;
229 imageCreateInfo.pQueueFamilyIndices = DE_NULL;
231 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
233 imageCreateInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
236 // Check if device supports sparse operations for image format
237 if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageCreateInfo))
238 TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
241 // Create logical device supporting both sparse and compute queues
242 QueueRequirementsVec queueRequirements;
243 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
244 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
246 createDeviceSupportingQueues(queueRequirements);
249 const DeviceInterface& deviceInterface = getDeviceInterface();
250 const Queue& sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
251 const Queue& computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
253 // Create sparse image
254 const Unique<VkImage> sparseImage(createImage(deviceInterface, getDevice(), &imageCreateInfo));
256 // Create sparse image memory bind semaphore
257 const Unique<VkSemaphore> imageMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
260 // Get image general memory requirements
261 const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
263 if (imageMemoryRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
264 TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
266 DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
268 // Get sparse image sparse memory requirements
269 const std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
271 DE_ASSERT(sparseMemoryRequirements.size() != 0);
273 const deUint32 colorAspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_COLOR_BIT);
274 const deUint32 metadataAspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_METADATA_BIT);
276 if (colorAspectIndex == NO_MATCH_FOUND)
277 TCU_THROW(NotSupportedError, "Not supported image aspect - the test supports currently only VK_IMAGE_ASPECT_COLOR_BIT");
279 aspectRequirements = sparseMemoryRequirements[colorAspectIndex];
280 imageGranularity = aspectRequirements.formatProperties.imageGranularity;
282 const VkImageAspectFlags aspectMask = aspectRequirements.formatProperties.aspectMask;
284 DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
286 std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
287 std::vector<VkSparseMemoryBind> imageMipTailMemoryBinds;
289 const deUint32 memoryType = findMatchingMemoryType(instance, physicalDevice, imageMemoryRequirements, MemoryRequirement::Any);
291 if (memoryType == NO_MATCH_FOUND)
292 return tcu::TestStatus::fail("No matching memory type found");
294 // Bind device memory for each aspect
295 for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
297 for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
299 const VkImageSubresource subresource = { aspectMask, mipLevelNdx, layerNdx };
300 const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, mipLevelNdx);
301 const tcu::UVec3 numSparseBinds = alignedDivide(mipExtent, imageGranularity);
302 const tcu::UVec3 lastBlockExtent = tcu::UVec3(mipExtent.width % imageGranularity.width ? mipExtent.width % imageGranularity.width : imageGranularity.width,
303 mipExtent.height % imageGranularity.height ? mipExtent.height % imageGranularity.height : imageGranularity.height,
304 mipExtent.depth % imageGranularity.depth ? mipExtent.depth % imageGranularity.depth : imageGranularity.depth);
305 for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
306 for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
307 for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
309 const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
311 if (linearIndex % 2u == 1u)
317 offset.x = x*imageGranularity.width;
318 offset.y = y*imageGranularity.height;
319 offset.z = z*imageGranularity.depth;
322 extent.width = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
323 extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
324 extent.depth = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
326 const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(deviceInterface, getDevice(),
327 imageMemoryRequirements.alignment, memoryType, subresource, offset, extent);
329 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
331 imageResidencyMemoryBinds.push_back(imageMemoryBind);
335 if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
337 const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
338 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
340 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
342 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
346 if (metadataAspectIndex != NO_MATCH_FOUND)
348 const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
350 if (!(metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
352 const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
353 metadataAspectRequirements.imageMipTailSize, memoryType,
354 metadataAspectRequirements.imageMipTailOffset + layerNdx * metadataAspectRequirements.imageMipTailStride,
355 VK_SPARSE_MEMORY_BIND_METADATA_BIT);
357 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
359 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
364 if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
366 const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
367 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
369 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
371 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
375 if (metadataAspectIndex != NO_MATCH_FOUND)
377 const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
379 if ((metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
381 const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
382 metadataAspectRequirements.imageMipTailSize, memoryType, metadataAspectRequirements.imageMipTailOffset,
383 VK_SPARSE_MEMORY_BIND_METADATA_BIT);
385 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
387 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
391 VkBindSparseInfo bindSparseInfo =
393 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType;
394 DE_NULL, //const void* pNext;
395 0u, //deUint32 waitSemaphoreCount;
396 DE_NULL, //const VkSemaphore* pWaitSemaphores;
397 0u, //deUint32 bufferBindCount;
398 DE_NULL, //const VkSparseBufferMemoryBindInfo* pBufferBinds;
399 0u, //deUint32 imageOpaqueBindCount;
400 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
401 0u, //deUint32 imageBindCount;
402 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds;
403 1u, //deUint32 signalSemaphoreCount;
404 &imageMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
407 VkSparseImageMemoryBindInfo imageResidencyBindInfo;
408 VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo;
410 if (imageResidencyMemoryBinds.size() > 0)
412 imageResidencyBindInfo.image = *sparseImage;
413 imageResidencyBindInfo.bindCount = static_cast<deUint32>(imageResidencyMemoryBinds.size());
414 imageResidencyBindInfo.pBinds = &imageResidencyMemoryBinds[0];
416 bindSparseInfo.imageBindCount = 1u;
417 bindSparseInfo.pImageBinds = &imageResidencyBindInfo;
420 if (imageMipTailMemoryBinds.size() > 0)
422 imageMipTailBindInfo.image = *sparseImage;
423 imageMipTailBindInfo.bindCount = static_cast<deUint32>(imageMipTailMemoryBinds.size());
424 imageMipTailBindInfo.pBinds = &imageMipTailMemoryBinds[0];
426 bindSparseInfo.imageOpaqueBindCount = 1u;
427 bindSparseInfo.pImageOpaqueBinds = &imageMipTailBindInfo;
430 // Submit sparse bind commands for execution
431 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
434 // Create command buffer for compute and transfer oparations
435 const Unique<VkCommandPool> commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
436 const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
438 // Start recording commands
439 beginCommandBuffer(deviceInterface, *commandBuffer);
441 // Create descriptor set layout
442 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
443 DescriptorSetLayoutBuilder()
444 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
445 .build(deviceInterface, getDevice()));
447 // Create and bind compute pipeline
448 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
449 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
450 const Unique<VkPipeline> computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
452 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
454 // Create and bind descriptor set
455 const Unique<VkDescriptorPool> descriptorPool(
456 DescriptorPoolBuilder()
457 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u)
458 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
460 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
462 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
463 const Unique<VkImageView> imageView(makeImageView(deviceInterface, getDevice(), *sparseImage, mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange));
464 const VkDescriptorImageInfo sparseImageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
466 DescriptorSetUpdateBuilder()
467 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &sparseImageInfo)
468 .update(deviceInterface, getDevice());
470 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
473 const VkImageMemoryBarrier sparseImageLayoutChangeBarrier = makeImageMemoryBarrier
476 VK_ACCESS_SHADER_WRITE_BIT,
477 VK_IMAGE_LAYOUT_UNDEFINED,
478 VK_IMAGE_LAYOUT_GENERAL,
479 sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
480 sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
485 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageLayoutChangeBarrier);
488 const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
491 const tcu::UVec3 workGroupSize = computeWorkGroupSize(gridSize);
493 const deUint32 xWorkGroupCount = gridSize.x() / workGroupSize.x() + (gridSize.x() % workGroupSize.x() ? 1u : 0u);
494 const deUint32 yWorkGroupCount = gridSize.y() / workGroupSize.y() + (gridSize.y() % workGroupSize.y() ? 1u : 0u);
495 const deUint32 zWorkGroupCount = gridSize.z() / workGroupSize.z() + (gridSize.z() % workGroupSize.z() ? 1u : 0u);
497 const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
499 if (maxComputeWorkGroupCount.x() < xWorkGroupCount ||
500 maxComputeWorkGroupCount.y() < yWorkGroupCount ||
501 maxComputeWorkGroupCount.z() < zWorkGroupCount)
503 TCU_THROW(NotSupportedError, "Image size is not supported");
506 deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
510 const VkImageMemoryBarrier sparseImageTrasferBarrier = makeImageMemoryBarrier
512 VK_ACCESS_SHADER_WRITE_BIT,
513 VK_ACCESS_TRANSFER_READ_BIT,
514 VK_IMAGE_LAYOUT_GENERAL,
515 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
520 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageTrasferBarrier);
523 const deUint32 imageSizeInBytes = getNumPixels(m_imageType, m_imageSize) * tcu::getPixelSize(m_format);
524 const VkBufferCreateInfo outputBufferCreateInfo = makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
525 const Unique<VkBuffer> outputBuffer (createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
526 const de::UniquePtr<Allocation> outputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
529 const VkBufferImageCopy bufferImageCopy = makeBufferImageCopy(imageCreateInfo.extent, imageCreateInfo.arrayLayers);
531 deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *sparseImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &bufferImageCopy);
535 const VkBufferMemoryBarrier outputBufferHostReadBarrier = makeBufferMemoryBarrier
537 VK_ACCESS_TRANSFER_WRITE_BIT,
538 VK_ACCESS_HOST_READ_BIT,
544 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostReadBarrier, 0u, DE_NULL);
547 // End recording commands
548 endCommandBuffer(deviceInterface, *commandBuffer);
550 // The stage at which execution is going to wait for finish of sparse binding operations
551 const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
553 // Submit commands for execution and wait for completion
554 submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &imageMemoryBindSemaphore.get(), stageBits);
556 // Retrieve data from buffer to host memory
557 invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), imageSizeInBytes);
559 const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
560 const tcu::ConstPixelBufferAccess pixelBuffer = tcu::ConstPixelBufferAccess(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputData);
562 // Wait for sparse queue to become idle
563 deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
566 if( aspectRequirements.imageMipTailFirstLod > 0u )
568 const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, 0u);
569 const tcu::UVec3 numSparseBinds = alignedDivide(mipExtent, imageGranularity);
570 const tcu::UVec3 lastBlockExtent = tcu::UVec3( mipExtent.width % imageGranularity.width ? mipExtent.width % imageGranularity.width : imageGranularity.width,
571 mipExtent.height % imageGranularity.height ? mipExtent.height % imageGranularity.height : imageGranularity.height,
572 mipExtent.depth % imageGranularity.depth ? mipExtent.depth % imageGranularity.depth : imageGranularity.depth);
574 for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
576 for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
577 for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
578 for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
581 offset.width = x*imageGranularity.width;
582 offset.height = y*imageGranularity.height;
583 offset.depth = z*imageGranularity.depth + layerNdx*numSparseBinds.z()*imageGranularity.depth;
586 extent.width = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
587 extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
588 extent.depth = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
590 const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
592 if (linearIndex % 2u == 0u)
594 for (deUint32 offsetZ = offset.depth; offsetZ < offset.depth + extent.depth; ++offsetZ)
595 for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
596 for (deUint32 offsetX = offset.width; offsetX < offset.width + extent.width; ++offsetX)
598 const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
599 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
601 if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
602 return tcu::TestStatus::fail("Failed");
605 else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
607 for (deUint32 offsetZ = offset.depth; offsetZ < offset.depth + extent.depth; ++offsetZ)
608 for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
609 for (deUint32 offsetX = offset.width; offsetX < offset.width + extent.width; ++offsetX)
611 const tcu::UVec4 referenceValue = tcu::UVec4(0u, 0u, 0u, 0u);
612 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
614 if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
615 return tcu::TestStatus::fail("Failed");
623 const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, 0u);
625 for (deUint32 offsetZ = 0u; offsetZ < mipExtent.depth * imageCreateInfo.arrayLayers; ++offsetZ)
626 for (deUint32 offsetY = 0u; offsetY < mipExtent.height; ++offsetY)
627 for (deUint32 offsetX = 0u; offsetX < mipExtent.width; ++offsetX)
629 const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
630 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
632 if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
633 return tcu::TestStatus::fail("Failed");
637 return tcu::TestStatus::pass("Passed");
640 TestInstance* ImageSparseResidencyCase::createInstance (Context& context) const
642 return new ImageSparseResidencyInstance(context, m_imageType, m_imageSize, m_format);
647 tcu::TestCaseGroup* createImageSparseResidencyTests (tcu::TestContext& testCtx)
649 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_residency", "Buffer Sparse Residency"));
651 static const deUint32 sizeCountPerImageType = 3u;
653 struct ImageParameters
656 tcu::UVec3 imageSizes[sizeCountPerImageType];
659 static const ImageParameters imageParametersArray[] =
661 { IMAGE_TYPE_2D, { tcu::UVec3(512u, 256u, 1u), tcu::UVec3(1024u, 128u, 1u), tcu::UVec3(11u, 137u, 1u) } },
662 { IMAGE_TYPE_2D_ARRAY, { tcu::UVec3(512u, 256u, 6u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u, 137u, 3u) } },
663 { IMAGE_TYPE_CUBE, { tcu::UVec3(256u, 256u, 1u), tcu::UVec3(128u, 128u, 1u), tcu::UVec3(137u, 137u, 1u) } },
664 { IMAGE_TYPE_CUBE_ARRAY, { tcu::UVec3(256u, 256u, 6u), tcu::UVec3(128u, 128u, 8u), tcu::UVec3(137u, 137u, 3u) } },
665 { IMAGE_TYPE_3D, { tcu::UVec3(512u, 256u, 16u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u, 137u, 3u) } }
668 static const tcu::TextureFormat formats[] =
670 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
671 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT16),
672 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT8),
673 tcu::TextureFormat(tcu::TextureFormat::RG, tcu::TextureFormat::SIGNED_INT32),
674 tcu::TextureFormat(tcu::TextureFormat::RG, tcu::TextureFormat::SIGNED_INT16),
675 tcu::TextureFormat(tcu::TextureFormat::RG, tcu::TextureFormat::SIGNED_INT8),
676 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT32),
677 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT16),
678 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT8)
681 for (deInt32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray); ++imageTypeNdx)
683 const ImageType imageType = imageParametersArray[imageTypeNdx].imageType;
684 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
686 for (deInt32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); ++formatNdx)
688 const tcu::TextureFormat& format = formats[formatNdx];
689 de::MovePtr<tcu::TestCaseGroup> formatGroup(new tcu::TestCaseGroup(testCtx, getShaderImageFormatQualifier(format).c_str(), ""));
691 for (deInt32 imageSizeNdx = 0; imageSizeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray[imageTypeNdx].imageSizes); ++imageSizeNdx)
693 const tcu::UVec3 imageSize = imageParametersArray[imageTypeNdx].imageSizes[imageSizeNdx];
695 std::ostringstream stream;
696 stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
698 formatGroup->addChild(new ImageSparseResidencyCase(testCtx, stream.str(), "", imageType, imageSize, format, glu::GLSL_VERSION_440));
700 imageTypeGroup->addChild(formatGroup.release());
702 testGroup->addChild(imageTypeGroup.release());
705 return testGroup.release();