1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2016 The Khronos Group Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 * \file vktSparseResourcesImageMemoryAliasing.cpp
21 * \brief Sparse image memory aliasing tests
22 *//*--------------------------------------------------------------------*/
24 #include "vktSparseResourcesImageMemoryAliasing.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkQueryUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkTypeUtil.hpp"
40 #include "deStringUtil.hpp"
41 #include "deUniquePtr.hpp"
42 #include "deSharedPtr.hpp"
43 #include "tcuTexture.hpp"
63 const std::string getCoordStr (const ImageType imageType,
71 case IMAGE_TYPE_BUFFER:
74 case IMAGE_TYPE_1D_ARRAY:
76 return "ivec2(" + x + "," + y + ")";
78 case IMAGE_TYPE_2D_ARRAY:
81 case IMAGE_TYPE_CUBE_ARRAY:
82 return "ivec3(" + x + "," + y + "," + z + ")";
90 tcu::UVec3 alignedDivide (const VkExtent3D& extent, const VkExtent3D& divisor)
94 result.x() = extent.width / divisor.width + ((extent.width % divisor.width) ? 1u : 0u);
95 result.y() = extent.height / divisor.height + ((extent.height % divisor.height) ? 1u : 0u);
96 result.z() = extent.depth / divisor.depth + ((extent.depth % divisor.depth) ? 1u : 0u);
101 class ImageSparseMemoryAliasingCase : public TestCase
104 ImageSparseMemoryAliasingCase (tcu::TestContext& testCtx,
105 const std::string& name,
106 const std::string& description,
107 const ImageType imageType,
108 const tcu::UVec3& imageSize,
109 const tcu::TextureFormat& format,
110 const glu::GLSLVersion glslVersion);
112 void initPrograms (SourceCollections& sourceCollections) const;
113 TestInstance* createInstance (Context& context) const;
117 const ImageType m_imageType;
118 const tcu::UVec3 m_imageSize;
119 const tcu::TextureFormat m_format;
120 const glu::GLSLVersion m_glslVersion;
123 ImageSparseMemoryAliasingCase::ImageSparseMemoryAliasingCase (tcu::TestContext& testCtx,
124 const std::string& name,
125 const std::string& description,
126 const ImageType imageType,
127 const tcu::UVec3& imageSize,
128 const tcu::TextureFormat& format,
129 const glu::GLSLVersion glslVersion)
130 : TestCase (testCtx, name, description)
131 , m_imageType (imageType)
132 , m_imageSize (imageSize)
134 , m_glslVersion (glslVersion)
138 class ImageSparseMemoryAliasingInstance : public SparseResourcesBaseInstance
141 ImageSparseMemoryAliasingInstance (Context& context,
142 const ImageType imageType,
143 const tcu::UVec3& imageSize,
144 const tcu::TextureFormat& format);
146 tcu::TestStatus iterate (void);
149 const ImageType m_imageType;
150 const tcu::UVec3 m_imageSize;
151 const tcu::TextureFormat m_format;
154 ImageSparseMemoryAliasingInstance::ImageSparseMemoryAliasingInstance (Context& context,
155 const ImageType imageType,
156 const tcu::UVec3& imageSize,
157 const tcu::TextureFormat& format)
158 : SparseResourcesBaseInstance (context)
159 , m_imageType (imageType)
160 , m_imageSize (imageSize)
165 tcu::TestStatus ImageSparseMemoryAliasingInstance::iterate (void)
167 const InstanceInterface& instance = m_context.getInstanceInterface();
168 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
169 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
170 const tcu::UVec3 maxWorkGroupSize = tcu::UVec3(128u, 128u, 64u);
171 const tcu::UVec3 maxWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
172 const deUint32 maxWorkGroupInvocations = 128u;
173 VkImageCreateInfo imageSparseInfo;
174 VkSparseImageMemoryRequirements aspectRequirements;
175 std::vector<DeviceMemoryUniquePtr> deviceMemUniquePtrVec;
177 // Check if image size does not exceed device limits
178 if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
179 TCU_THROW(NotSupportedError, "Image size not supported for device");
181 // Check if sparse memory aliasing is supported
182 if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyAliased)
183 TCU_THROW(NotSupportedError, "Sparse memory aliasing not supported");
185 // Check if device supports sparse operations for image type
186 if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
187 TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
189 imageSparseInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
190 imageSparseInfo.pNext = DE_NULL;
191 imageSparseInfo.flags = VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
192 VK_IMAGE_CREATE_SPARSE_ALIASED_BIT |
193 VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
194 imageSparseInfo.imageType = mapImageType(m_imageType);
195 imageSparseInfo.format = mapTextureFormat(m_format);
196 imageSparseInfo.extent = makeExtent3D(getLayerSize(m_imageType, m_imageSize));
197 imageSparseInfo.arrayLayers = getNumLayers(m_imageType, m_imageSize);
198 imageSparseInfo.samples = VK_SAMPLE_COUNT_1_BIT;
199 imageSparseInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
200 imageSparseInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
201 imageSparseInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT |
202 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
203 VK_IMAGE_USAGE_STORAGE_BIT;
204 imageSparseInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
205 imageSparseInfo.queueFamilyIndexCount = 0u;
206 imageSparseInfo.pQueueFamilyIndices = DE_NULL;
208 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
209 imageSparseInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
212 // Assign maximum allowed mipmap levels to image
213 VkImageFormatProperties imageFormatProperties;
214 instance.getPhysicalDeviceImageFormatProperties(physicalDevice,
215 imageSparseInfo.format,
216 imageSparseInfo.imageType,
217 imageSparseInfo.tiling,
218 imageSparseInfo.usage,
219 imageSparseInfo.flags,
220 &imageFormatProperties);
222 imageSparseInfo.mipLevels = getImageMaxMipLevels(imageFormatProperties, imageSparseInfo.extent);
225 // Check if device supports sparse operations for image format
226 if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageSparseInfo))
227 TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
230 // Create logical device supporting both sparse and compute queues
231 QueueRequirementsVec queueRequirements;
232 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
233 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
235 createDeviceSupportingQueues(queueRequirements);
238 const Queue& sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
239 const Queue& computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
241 const de::UniquePtr<Allocator> allocator(new SimpleAllocator(deviceInterface, *m_logicalDevice, getPhysicalDeviceMemoryProperties(instance, physicalDevice)));
243 // Create sparse image
244 const Unique<VkImage> imageRead(createImage(deviceInterface, *m_logicalDevice, &imageSparseInfo));
245 const Unique<VkImage> imageWrite(createImage(deviceInterface, *m_logicalDevice, &imageSparseInfo));
247 // Create semaphores to synchronize sparse binding operations with other operations on the sparse images
248 const Unique<VkSemaphore> memoryBindSemaphoreTransfer(makeSemaphore(deviceInterface, *m_logicalDevice));
249 const Unique<VkSemaphore> memoryBindSemaphoreCompute(makeSemaphore(deviceInterface, *m_logicalDevice));
251 const VkSemaphore imageMemoryBindSemaphores[] = { memoryBindSemaphoreTransfer.get(), memoryBindSemaphoreCompute.get() };
254 std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
255 std::vector<VkSparseMemoryBind> imageReadMipTailBinds;
256 std::vector<VkSparseMemoryBind> imageWriteMipTailBinds;
258 // Get sparse image general memory requirements
259 const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, *m_logicalDevice, *imageRead);
261 // Check if required image memory size does not exceed device limits
262 if (imageMemoryRequirements.size > getPhysicalDeviceProperties(instance, physicalDevice).limits.sparseAddressSpaceSize)
263 TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
265 DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
267 // Get sparse image sparse memory requirements
268 const std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, *m_logicalDevice, *imageRead);
270 DE_ASSERT(sparseMemoryRequirements.size() != 0);
272 const deUint32 colorAspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_COLOR_BIT);
274 if (colorAspectIndex == NO_MATCH_FOUND)
275 TCU_THROW(NotSupportedError, "Not supported image aspect - the test supports currently only VK_IMAGE_ASPECT_COLOR_BIT");
277 aspectRequirements = sparseMemoryRequirements[colorAspectIndex];
279 const VkImageAspectFlags aspectMask = aspectRequirements.formatProperties.aspectMask;
280 const VkExtent3D imageGranularity = aspectRequirements.formatProperties.imageGranularity;
282 DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
284 const deUint32 memoryType = findMatchingMemoryType(instance, physicalDevice, imageMemoryRequirements, MemoryRequirement::Any);
286 if (memoryType == NO_MATCH_FOUND)
287 return tcu::TestStatus::fail("No matching memory type found");
289 // Bind memory for each layer
290 for (deUint32 layerNdx = 0; layerNdx < imageSparseInfo.arrayLayers; ++layerNdx)
292 for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
294 const VkExtent3D mipExtent = mipLevelExtents(imageSparseInfo.extent, mipLevelNdx);
295 const tcu::UVec3 sparseBlocks = alignedDivide(mipExtent, imageGranularity);
296 const deUint32 numSparseBlocks = sparseBlocks.x() * sparseBlocks.y() * sparseBlocks.z();
297 const VkImageSubresource subresource = { aspectMask, mipLevelNdx, layerNdx };
299 const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(deviceInterface, *m_logicalDevice,
300 imageMemoryRequirements.alignment * numSparseBlocks, memoryType, subresource, makeOffset3D(0u, 0u, 0u), mipExtent);
302 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
304 imageResidencyMemoryBinds.push_back(imageMemoryBind);
307 if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageSparseInfo.mipLevels)
309 const VkSparseMemoryBind imageReadMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, *m_logicalDevice,
310 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
312 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageReadMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
314 imageReadMipTailBinds.push_back(imageReadMipTailMemoryBind);
316 const VkSparseMemoryBind imageWriteMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, *m_logicalDevice,
317 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
319 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageWriteMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
321 imageWriteMipTailBinds.push_back(imageWriteMipTailMemoryBind);
325 if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageSparseInfo.mipLevels)
327 const VkSparseMemoryBind imageReadMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, *m_logicalDevice,
328 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
330 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageReadMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
332 imageReadMipTailBinds.push_back(imageReadMipTailMemoryBind);
334 const VkSparseMemoryBind imageWriteMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, *m_logicalDevice,
335 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
337 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageWriteMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
339 imageWriteMipTailBinds.push_back(imageWriteMipTailMemoryBind);
342 VkBindSparseInfo bindSparseInfo =
344 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType;
345 DE_NULL, //const void* pNext;
346 0u, //deUint32 waitSemaphoreCount;
347 DE_NULL, //const VkSemaphore* pWaitSemaphores;
348 0u, //deUint32 bufferBindCount;
349 DE_NULL, //const VkSparseBufferMemoryBindInfo* pBufferBinds;
350 0u, //deUint32 imageOpaqueBindCount;
351 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
352 0u, //deUint32 imageBindCount;
353 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds;
354 2u, //deUint32 signalSemaphoreCount;
355 imageMemoryBindSemaphores //const VkSemaphore* pSignalSemaphores;
358 VkSparseImageMemoryBindInfo imageResidencyBindInfo[2];
359 VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo[2];
361 if (imageResidencyMemoryBinds.size() > 0)
363 imageResidencyBindInfo[0].image = *imageRead;
364 imageResidencyBindInfo[0].bindCount = static_cast<deUint32>(imageResidencyMemoryBinds.size());
365 imageResidencyBindInfo[0].pBinds = &imageResidencyMemoryBinds[0];
367 imageResidencyBindInfo[1].image = *imageWrite;
368 imageResidencyBindInfo[1].bindCount = static_cast<deUint32>(imageResidencyMemoryBinds.size());
369 imageResidencyBindInfo[1].pBinds = &imageResidencyMemoryBinds[0];
371 bindSparseInfo.imageBindCount = 2u;
372 bindSparseInfo.pImageBinds = imageResidencyBindInfo;
375 if (imageReadMipTailBinds.size() > 0)
377 imageMipTailBindInfo[0].image = *imageRead;
378 imageMipTailBindInfo[0].bindCount = static_cast<deUint32>(imageReadMipTailBinds.size());
379 imageMipTailBindInfo[0].pBinds = &imageReadMipTailBinds[0];
381 imageMipTailBindInfo[1].image = *imageWrite;
382 imageMipTailBindInfo[1].bindCount = static_cast<deUint32>(imageWriteMipTailBinds.size());
383 imageMipTailBindInfo[1].pBinds = &imageWriteMipTailBinds[0];
385 bindSparseInfo.imageOpaqueBindCount = 2u;
386 bindSparseInfo.pImageOpaqueBinds = imageMipTailBindInfo;
389 // Submit sparse bind commands for execution
390 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
393 // Create command buffer for compute and transfer oparations
394 const Unique<VkCommandPool> commandPool (makeCommandPool(deviceInterface, *m_logicalDevice, computeQueue.queueFamilyIndex));
395 const Unique<VkCommandBuffer> commandBuffer(makeCommandBuffer(deviceInterface, *m_logicalDevice, *commandPool));
397 std::vector<VkBufferImageCopy> bufferImageCopy(imageSparseInfo.mipLevels);
400 deUint32 bufferOffset = 0u;
401 for (deUint32 mipLevelNdx = 0u; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
403 bufferImageCopy[mipLevelNdx] = makeBufferImageCopy(mipLevelExtents(imageSparseInfo.extent, mipLevelNdx), imageSparseInfo.arrayLayers, mipLevelNdx, bufferOffset);
404 bufferOffset += getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, mipLevelNdx, MEM_ALIGN_BUFFERIMAGECOPY_OFFSET);
408 // Start recording commands
409 beginCommandBuffer(deviceInterface, *commandBuffer);
411 const deUint32 imageSizeInBytes = getImageSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, imageSparseInfo.mipLevels, MEM_ALIGN_BUFFERIMAGECOPY_OFFSET);
412 const VkBufferCreateInfo inputBufferCreateInfo = makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
414 const de::UniquePtr<Buffer> inputBuffer(new Buffer(deviceInterface, *m_logicalDevice, *allocator, inputBufferCreateInfo, MemoryRequirement::HostVisible));
416 std::vector<deUint8> referenceData(imageSizeInBytes);
418 for (deUint32 mipLevelNdx = 0u; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
420 const deUint32 mipLevelSizeInBytes = getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, mipLevelNdx);
421 const deUint32 bufferOffset = static_cast<deUint32>(bufferImageCopy[mipLevelNdx].bufferOffset);
423 deMemset(&referenceData[bufferOffset], mipLevelNdx + 1u, mipLevelSizeInBytes);
426 deMemcpy(inputBuffer->getAllocation().getHostPtr(), &referenceData[0], imageSizeInBytes);
428 flushMappedMemoryRange(deviceInterface, *m_logicalDevice, inputBuffer->getAllocation().getMemory(), inputBuffer->getAllocation().getOffset(), imageSizeInBytes);
431 const VkBufferMemoryBarrier inputBufferBarrier = makeBufferMemoryBarrier
433 VK_ACCESS_HOST_WRITE_BIT,
434 VK_ACCESS_TRANSFER_READ_BIT,
440 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL);
444 const VkImageMemoryBarrier imageSparseTransferDstBarrier = makeImageMemoryBarrier
447 VK_ACCESS_TRANSFER_WRITE_BIT,
448 VK_IMAGE_LAYOUT_UNDEFINED,
449 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
450 sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
451 sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
453 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)
456 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseTransferDstBarrier);
459 deviceInterface.cmdCopyBufferToImage(*commandBuffer, inputBuffer->get(), *imageRead, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<deUint32>(bufferImageCopy.size()), &bufferImageCopy[0]);
462 const VkImageMemoryBarrier imageSparseTransferSrcBarrier = makeImageMemoryBarrier
464 VK_ACCESS_TRANSFER_WRITE_BIT,
465 VK_ACCESS_TRANSFER_READ_BIT,
466 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
467 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
469 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)
472 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseTransferSrcBarrier);
476 const VkImageMemoryBarrier imageSparseShaderStorageBarrier = makeImageMemoryBarrier
479 VK_ACCESS_SHADER_WRITE_BIT,
480 VK_IMAGE_LAYOUT_UNDEFINED,
481 VK_IMAGE_LAYOUT_GENERAL,
483 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)
486 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseShaderStorageBarrier);
489 // Create descriptor set layout
490 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
491 DescriptorSetLayoutBuilder()
492 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
493 .build(deviceInterface, *m_logicalDevice));
495 Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, *m_logicalDevice, *descriptorSetLayout));
497 Unique<VkDescriptorPool> descriptorPool(
498 DescriptorPoolBuilder()
499 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, imageSparseInfo.mipLevels)
500 .build(deviceInterface, *m_logicalDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, imageSparseInfo.mipLevels));
502 typedef de::SharedPtr< Unique<VkImageView> > SharedVkImageView;
503 std::vector<SharedVkImageView> imageViews;
504 imageViews.resize(imageSparseInfo.mipLevels);
506 typedef de::SharedPtr< Unique<VkDescriptorSet> > SharedVkDescriptorSet;
507 std::vector<SharedVkDescriptorSet> descriptorSets;
508 descriptorSets.resize(imageSparseInfo.mipLevels);
510 typedef de::SharedPtr< Unique<VkPipeline> > SharedVkPipeline;
511 std::vector<SharedVkPipeline> computePipelines;
512 computePipelines.resize(imageSparseInfo.mipLevels);
514 for (deUint32 mipLevelNdx = 0u; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
516 std::ostringstream name;
517 name << "comp" << mipLevelNdx;
519 // Create and bind compute pipeline
520 Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, *m_logicalDevice, m_context.getBinaryCollection().get(name.str()), DE_NULL));
522 computePipelines[mipLevelNdx] = makeVkSharedPtr(makeComputePipeline(deviceInterface, *m_logicalDevice, *pipelineLayout, *shaderModule));
523 VkPipeline computePipeline = **computePipelines[mipLevelNdx];
525 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline);
527 // Create and bind descriptor set
528 descriptorSets[mipLevelNdx] = makeVkSharedPtr(makeDescriptorSet(deviceInterface, *m_logicalDevice, *descriptorPool, *descriptorSetLayout));
529 VkDescriptorSet descriptorSet = **descriptorSets[mipLevelNdx];
531 // Select which mipmap level to bind
532 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, mipLevelNdx, 1u, 0u, imageSparseInfo.arrayLayers);
534 imageViews[mipLevelNdx] = makeVkSharedPtr(makeImageView(deviceInterface, *m_logicalDevice, *imageWrite, mapImageViewType(m_imageType), imageSparseInfo.format, subresourceRange));
535 VkImageView imageView = **imageViews[mipLevelNdx];
537 const VkDescriptorImageInfo sparseImageInfo = makeDescriptorImageInfo(DE_NULL, imageView, VK_IMAGE_LAYOUT_GENERAL);
539 DescriptorSetUpdateBuilder()
540 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &sparseImageInfo)
541 .update(deviceInterface, *m_logicalDevice);
543 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
545 const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
546 const deUint32 xWorkGroupSize = std::min(std::min(gridSize.x(), maxWorkGroupSize.x()), maxWorkGroupInvocations);
547 const deUint32 yWorkGroupSize = std::min(std::min(gridSize.y(), maxWorkGroupSize.y()), maxWorkGroupInvocations / xWorkGroupSize);
548 const deUint32 zWorkGroupSize = std::min(std::min(gridSize.z(), maxWorkGroupSize.z()), maxWorkGroupInvocations / (xWorkGroupSize * yWorkGroupSize));
550 const deUint32 xWorkGroupCount = gridSize.x() / xWorkGroupSize + (gridSize.x() % xWorkGroupSize ? 1u : 0u);
551 const deUint32 yWorkGroupCount = gridSize.y() / yWorkGroupSize + (gridSize.y() % yWorkGroupSize ? 1u : 0u);
552 const deUint32 zWorkGroupCount = gridSize.z() / zWorkGroupSize + (gridSize.z() % zWorkGroupSize ? 1u : 0u);
554 if (maxWorkGroupCount.x() < xWorkGroupCount ||
555 maxWorkGroupCount.y() < yWorkGroupCount ||
556 maxWorkGroupCount.z() < zWorkGroupCount)
557 TCU_THROW(NotSupportedError, "Image size is not supported");
559 deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
563 const VkMemoryBarrier memoryBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
565 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 1u, &memoryBarrier, 0u, DE_NULL, 0u, DE_NULL);
568 const VkBufferCreateInfo outputBufferCreateInfo = makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
569 const de::UniquePtr<Buffer> outputBuffer (new Buffer(deviceInterface, *m_logicalDevice, *allocator, outputBufferCreateInfo, MemoryRequirement::HostVisible));
571 deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *imageRead, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, outputBuffer->get(), static_cast<deUint32>(bufferImageCopy.size()), &bufferImageCopy[0]);
574 const VkBufferMemoryBarrier outputBufferBarrier = makeBufferMemoryBarrier
576 VK_ACCESS_TRANSFER_WRITE_BIT,
577 VK_ACCESS_HOST_READ_BIT,
583 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL);
586 // End recording commands
587 endCommandBuffer(deviceInterface, *commandBuffer);
589 const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
591 // Submit commands for execution and wait for completion
592 submitCommandsAndWait(deviceInterface, *m_logicalDevice, computeQueue.queueHandle, *commandBuffer, 2u, imageMemoryBindSemaphores, stageBits);
594 // Retrieve data from buffer to host memory
595 const Allocation& allocation = outputBuffer->getAllocation();
596 invalidateMappedMemoryRange(deviceInterface, *m_logicalDevice, allocation.getMemory(), allocation.getOffset(), imageSizeInBytes);
598 const deUint8* outputData = static_cast<const deUint8*>(allocation.getHostPtr());
600 // Wait for sparse queue to become idle
601 deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
603 for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
605 const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
606 const deUint32 bufferOffset = static_cast<deUint32>(bufferImageCopy[mipLevelNdx].bufferOffset);
607 const tcu::ConstPixelBufferAccess pixelBuffer = tcu::ConstPixelBufferAccess(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputData + bufferOffset);
609 for (deUint32 offsetZ = 0u; offsetZ < gridSize.z(); ++offsetZ)
610 for (deUint32 offsetY = 0u; offsetY < gridSize.y(); ++offsetY)
611 for (deUint32 offsetX = 0u; offsetX < gridSize.x(); ++offsetX)
613 const deUint32 index = offsetX + (offsetY + offsetZ * gridSize.y()) * gridSize.x();
614 const tcu::UVec4 referenceValue = tcu::UVec4(index % MODULO_DIVISOR, index % MODULO_DIVISOR, index % MODULO_DIVISOR, 1u);
615 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
617 if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
618 return tcu::TestStatus::fail("Failed");
622 for (deUint32 mipLevelNdx = aspectRequirements.imageMipTailFirstLod; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
624 const deUint32 mipLevelSizeInBytes = getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, mipLevelNdx);
625 const deUint32 bufferOffset = static_cast<deUint32>(bufferImageCopy[mipLevelNdx].bufferOffset);
627 if (deMemCmp(outputData + bufferOffset, &referenceData[bufferOffset], mipLevelSizeInBytes) != 0)
628 return tcu::TestStatus::fail("Failed");
631 return tcu::TestStatus::pass("Passed");
634 void ImageSparseMemoryAliasingCase::initPrograms(SourceCollections& sourceCollections) const
636 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
637 const std::string imageTypeStr = getShaderImageType(m_format, m_imageType);
638 const std::string formatQualifierStr = getShaderImageFormatQualifier(m_format);
639 const std::string formatDataStr = getShaderImageDataType(m_format);
640 const deUint32 maxWorkGroupInvocations = 128u;
641 const tcu::UVec3 maxWorkGroupSize = tcu::UVec3(128u, 128u, 64u);
643 const tcu::UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
644 const deUint32 widestEdge = std::max(std::max(layerSize.x(), layerSize.y()), layerSize.z());
645 const deUint32 mipLevels = static_cast<deUint32>(deFloatLog2(static_cast<float>(widestEdge))) + 1u;
647 for (deUint32 mipLevelNdx = 0; mipLevelNdx < mipLevels; ++mipLevelNdx)
649 // Create compute program
650 const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
651 const deUint32 xWorkGroupSize = std::min(std::min(gridSize.x(), maxWorkGroupSize.x()), maxWorkGroupInvocations);
652 const deUint32 yWorkGroupSize = std::min(std::min(gridSize.y(), maxWorkGroupSize.y()), maxWorkGroupInvocations / xWorkGroupSize);
653 const deUint32 zWorkGroupSize = std::min(std::min(gridSize.z(), maxWorkGroupSize.z()), maxWorkGroupInvocations / (xWorkGroupSize * yWorkGroupSize));
655 std::ostringstream src;
657 src << versionDecl << "\n"
658 << "layout (local_size_x = " << xWorkGroupSize << ", local_size_y = " << yWorkGroupSize << ", local_size_z = " << zWorkGroupSize << ") in; \n"
659 << "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
660 << "void main (void)\n"
662 << " if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
663 << " if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
664 << " if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
666 << " int index = int(gl_GlobalInvocationID.x + (gl_GlobalInvocationID.y + gl_GlobalInvocationID.z*" << gridSize.y() << ")*" << gridSize.x() << ");\n"
667 << " imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
668 << formatDataStr << "( index % " << MODULO_DIVISOR << ", index % " << MODULO_DIVISOR << ", index % " << MODULO_DIVISOR << ", 1 )); \n"
672 std::ostringstream name;
673 name << "comp" << mipLevelNdx;
674 sourceCollections.glslSources.add(name.str()) << glu::ComputeSource(src.str());
678 TestInstance* ImageSparseMemoryAliasingCase::createInstance (Context& context) const
680 return new ImageSparseMemoryAliasingInstance(context, m_imageType, m_imageSize, m_format);
685 tcu::TestCaseGroup* createImageSparseMemoryAliasingTests (tcu::TestContext& testCtx)
687 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_memory_aliasing", "Sparse Image Memory Aliasing"));
689 static const deUint32 sizeCountPerImageType = 4u;
691 struct ImageParameters
694 tcu::UVec3 imageSizes[sizeCountPerImageType];
697 static const ImageParameters imageParametersArray[] =
699 { IMAGE_TYPE_2D, { tcu::UVec3(512u, 256u, 1u), tcu::UVec3(128u, 128u, 1u), tcu::UVec3(503u, 137u, 1u), tcu::UVec3(11u, 37u, 1u) } },
700 { IMAGE_TYPE_2D_ARRAY, { tcu::UVec3(512u, 256u, 6u), tcu::UVec3(128u, 128u, 8u), tcu::UVec3(503u, 137u, 3u), tcu::UVec3(11u, 37u, 3u) } },
701 { IMAGE_TYPE_CUBE, { tcu::UVec3(256u, 256u, 1u), tcu::UVec3(128u, 128u, 1u), tcu::UVec3(137u, 137u, 1u), tcu::UVec3(11u, 11u, 1u) } },
702 { IMAGE_TYPE_CUBE_ARRAY,{ tcu::UVec3(256u, 256u, 6u), tcu::UVec3(128u, 128u, 8u), tcu::UVec3(137u, 137u, 3u), tcu::UVec3(11u, 11u, 3u) } },
703 { IMAGE_TYPE_3D, { tcu::UVec3(256u, 256u, 16u), tcu::UVec3(128u, 128u, 8u), tcu::UVec3(503u, 137u, 3u), tcu::UVec3(11u, 37u, 3u) } }
706 static const tcu::TextureFormat formats[] =
708 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
709 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT16),
710 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT8),
711 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT32),
712 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT16),
713 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT8)
716 for (deInt32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray); ++imageTypeNdx)
718 const ImageType imageType = imageParametersArray[imageTypeNdx].imageType;
719 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
721 for (deInt32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); ++formatNdx)
723 const tcu::TextureFormat& format = formats[formatNdx];
724 de::MovePtr<tcu::TestCaseGroup> formatGroup(new tcu::TestCaseGroup(testCtx, getShaderImageFormatQualifier(format).c_str(), ""));
726 for (deInt32 imageSizeNdx = 0; imageSizeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray[imageTypeNdx].imageSizes); ++imageSizeNdx)
728 const tcu::UVec3 imageSize = imageParametersArray[imageTypeNdx].imageSizes[imageSizeNdx];
730 std::ostringstream stream;
731 stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
733 formatGroup->addChild(new ImageSparseMemoryAliasingCase(testCtx, stream.str(), "", imageType, imageSize, format, glu::GLSL_VERSION_440));
735 imageTypeGroup->addChild(formatGroup.release());
737 testGroup->addChild(imageTypeGroup.release());
740 return testGroup.release();