Merge "CP: Split load/store image tests into sub-groups" into nougat-cts-dev
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / sparse_resources / vktSparseResourcesImageMemoryAliasing.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktSparseResourcesImageMemoryAliasing.cpp
21  * \brief Sparse image memory aliasing tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesImageMemoryAliasing.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkQueryUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkTypeUtil.hpp"
39
40 #include "deStringUtil.hpp"
41 #include "deUniquePtr.hpp"
42 #include "deSharedPtr.hpp"
43 #include "tcuTexture.hpp"
44
45 #include <deMath.h>
46 #include <string>
47 #include <vector>
48
49 using namespace vk;
50
51 namespace vkt
52 {
53 namespace sparse
54 {
55 namespace
56 {
57
58 enum ShaderParameters
59 {
60         MODULO_DIVISOR = 128
61 };
62
63 const std::string getCoordStr  (const ImageType         imageType,
64                                                                 const std::string&      x,
65                                                                 const std::string&      y,
66                                                                 const std::string&      z)
67 {
68         switch (imageType)
69         {
70                 case IMAGE_TYPE_1D:
71                 case IMAGE_TYPE_BUFFER:
72                         return x;
73
74                 case IMAGE_TYPE_1D_ARRAY:
75                 case IMAGE_TYPE_2D:
76                         return "ivec2(" + x + "," + y + ")";
77
78                 case IMAGE_TYPE_2D_ARRAY:
79                 case IMAGE_TYPE_3D:
80                 case IMAGE_TYPE_CUBE:
81                 case IMAGE_TYPE_CUBE_ARRAY:
82                         return "ivec3(" + x + "," + y + "," + z + ")";
83
84                 default:
85                         DE_ASSERT(false);
86                         return "";
87         }
88 }
89
90 tcu::UVec3 alignedDivide (const VkExtent3D& extent, const VkExtent3D& divisor)
91 {
92         tcu::UVec3 result;
93
94         result.x() = extent.width  / divisor.width  + ((extent.width  % divisor.width)  ? 1u : 0u);
95         result.y() = extent.height / divisor.height + ((extent.height % divisor.height) ? 1u : 0u);
96         result.z() = extent.depth  / divisor.depth  + ((extent.depth  % divisor.depth)  ? 1u : 0u);
97
98         return result;
99 }
100
101 class ImageSparseMemoryAliasingCase : public TestCase
102 {
103 public:
104                                         ImageSparseMemoryAliasingCase   (tcu::TestContext&                      testCtx,
105                                                                                                          const std::string&                     name,
106                                                                                                          const std::string&                     description,
107                                                                                                          const ImageType                        imageType,
108                                                                                                          const tcu::UVec3&                      imageSize,
109                                                                                                          const tcu::TextureFormat&      format,
110                                                                                                          const glu::GLSLVersion         glslVersion);
111
112         void                    initPrograms                                    (SourceCollections&                     sourceCollections) const;
113         TestInstance*   createInstance                                  (Context&                                       context) const;
114
115
116 private:
117         const ImageType                         m_imageType;
118         const tcu::UVec3                        m_imageSize;
119         const tcu::TextureFormat        m_format;
120         const glu::GLSLVersion          m_glslVersion;
121 };
122
123 ImageSparseMemoryAliasingCase::ImageSparseMemoryAliasingCase (tcu::TestContext&                 testCtx,
124                                                                                                                           const std::string&            name,
125                                                                                                                           const std::string&            description,
126                                                                                                                           const ImageType                       imageType,
127                                                                                                                           const tcu::UVec3&                     imageSize,
128                                                                                                                           const tcu::TextureFormat&     format,
129                                                                                                                           const glu::GLSLVersion        glslVersion)
130         : TestCase                              (testCtx, name, description)
131         , m_imageType                   (imageType)
132         , m_imageSize                   (imageSize)
133         , m_format                              (format)
134         , m_glslVersion                 (glslVersion)
135 {
136 }
137
138 class ImageSparseMemoryAliasingInstance : public SparseResourcesBaseInstance
139 {
140 public:
141                                         ImageSparseMemoryAliasingInstance       (Context&                                                               context,
142                                                                                                                  const ImageType                                                imageType,
143                                                                                                                  const tcu::UVec3&                                              imageSize,
144                                                                                                                  const tcu::TextureFormat&                              format);
145
146         tcu::TestStatus iterate                                                         (void);
147
148 private:
149         const ImageType                         m_imageType;
150         const tcu::UVec3                        m_imageSize;
151         const tcu::TextureFormat        m_format;
152 };
153
154 ImageSparseMemoryAliasingInstance::ImageSparseMemoryAliasingInstance (Context&                                  context,
155                                                                                                                                           const ImageType                       imageType,
156                                                                                                                                           const tcu::UVec3&                     imageSize,
157                                                                                                                                           const tcu::TextureFormat&     format)
158         : SparseResourcesBaseInstance   (context)
159         , m_imageType                                   (imageType)
160         , m_imageSize                                   (imageSize)
161         , m_format                                              (format)
162 {
163 }
164
165 tcu::TestStatus ImageSparseMemoryAliasingInstance::iterate (void)
166 {
167         const InstanceInterface&                        instance                                = m_context.getInstanceInterface();
168         const DeviceInterface&                          deviceInterface                 = m_context.getDeviceInterface();
169         const VkPhysicalDevice                          physicalDevice                  = m_context.getPhysicalDevice();
170         const tcu::UVec3                                        maxWorkGroupSize                = tcu::UVec3(128u, 128u, 64u);
171         const tcu::UVec3                                        maxWorkGroupCount               = tcu::UVec3(65535u, 65535u, 65535u);
172         const deUint32                                          maxWorkGroupInvocations = 128u;
173         VkImageCreateInfo                                       imageSparseInfo;
174         VkSparseImageMemoryRequirements         aspectRequirements;
175         std::vector<DeviceMemoryUniquePtr>      deviceMemUniquePtrVec;
176
177         // Check if image size does not exceed device limits
178         if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
179                 TCU_THROW(NotSupportedError, "Image size not supported for device");
180
181         // Check if sparse memory aliasing is supported
182         if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyAliased)
183                 TCU_THROW(NotSupportedError, "Sparse memory aliasing not supported");
184
185         // Check if device supports sparse operations for image type
186         if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
187                 TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
188
189         imageSparseInfo.sType                                   = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
190         imageSparseInfo.pNext                                   = DE_NULL;
191         imageSparseInfo.flags                                   = VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
192                                                                                           VK_IMAGE_CREATE_SPARSE_ALIASED_BIT   |
193                                                                                           VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
194         imageSparseInfo.imageType                               = mapImageType(m_imageType);
195         imageSparseInfo.format                                  = mapTextureFormat(m_format);
196         imageSparseInfo.extent                                  = makeExtent3D(getLayerSize(m_imageType, m_imageSize));
197         imageSparseInfo.arrayLayers                             = getNumLayers(m_imageType, m_imageSize);
198         imageSparseInfo.samples                                 = VK_SAMPLE_COUNT_1_BIT;
199         imageSparseInfo.tiling                                  = VK_IMAGE_TILING_OPTIMAL;
200         imageSparseInfo.initialLayout                   = VK_IMAGE_LAYOUT_UNDEFINED;
201         imageSparseInfo.usage                                   = VK_IMAGE_USAGE_TRANSFER_DST_BIT |
202                                                                                           VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
203                                                                                           VK_IMAGE_USAGE_STORAGE_BIT;
204         imageSparseInfo.sharingMode                             = VK_SHARING_MODE_EXCLUSIVE;
205         imageSparseInfo.queueFamilyIndexCount   = 0u;
206         imageSparseInfo.pQueueFamilyIndices             = DE_NULL;
207
208         if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
209                 imageSparseInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
210
211         {
212                 // Assign maximum allowed mipmap levels to image
213                 VkImageFormatProperties imageFormatProperties;
214                 instance.getPhysicalDeviceImageFormatProperties(physicalDevice,
215                         imageSparseInfo.format,
216                         imageSparseInfo.imageType,
217                         imageSparseInfo.tiling,
218                         imageSparseInfo.usage,
219                         imageSparseInfo.flags,
220                         &imageFormatProperties);
221
222                 imageSparseInfo.mipLevels = getImageMaxMipLevels(imageFormatProperties, imageSparseInfo.extent);
223         }
224
225         // Check if device supports sparse operations for image format
226         if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageSparseInfo))
227                 TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
228
229         {
230                 // Create logical device supporting both sparse and compute queues
231                 QueueRequirementsVec queueRequirements;
232                 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
233                 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
234
235                 createDeviceSupportingQueues(queueRequirements);
236         }
237
238         const Queue& sparseQueue        = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
239         const Queue& computeQueue       = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
240
241         const de::UniquePtr<Allocator> allocator(new SimpleAllocator(deviceInterface, *m_logicalDevice, getPhysicalDeviceMemoryProperties(instance, physicalDevice)));
242
243         // Create sparse image
244         const Unique<VkImage> imageRead(createImage(deviceInterface, *m_logicalDevice, &imageSparseInfo));
245         const Unique<VkImage> imageWrite(createImage(deviceInterface, *m_logicalDevice, &imageSparseInfo));
246
247         // Create semaphores to synchronize sparse binding operations with other operations on the sparse images
248         const Unique<VkSemaphore> memoryBindSemaphoreTransfer(makeSemaphore(deviceInterface, *m_logicalDevice));
249         const Unique<VkSemaphore> memoryBindSemaphoreCompute(makeSemaphore(deviceInterface, *m_logicalDevice));
250
251         const VkSemaphore imageMemoryBindSemaphores[] = { memoryBindSemaphoreTransfer.get(), memoryBindSemaphoreCompute.get() };
252
253         {
254                 std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
255                 std::vector<VkSparseMemoryBind>          imageReadMipTailBinds;
256                 std::vector<VkSparseMemoryBind>          imageWriteMipTailBinds;
257
258                 // Get sparse image general memory requirements
259                 const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, *m_logicalDevice, *imageRead);
260
261                 // Check if required image memory size does not exceed device limits
262                 if (imageMemoryRequirements.size > getPhysicalDeviceProperties(instance, physicalDevice).limits.sparseAddressSpaceSize)
263                         TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
264
265                 DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
266
267                 // Get sparse image sparse memory requirements
268                 const std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, *m_logicalDevice, *imageRead);
269
270                 DE_ASSERT(sparseMemoryRequirements.size() != 0);
271
272                 const deUint32 colorAspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_COLOR_BIT);
273
274                 if (colorAspectIndex == NO_MATCH_FOUND)
275                         TCU_THROW(NotSupportedError, "Not supported image aspect - the test supports currently only VK_IMAGE_ASPECT_COLOR_BIT");
276
277                 aspectRequirements = sparseMemoryRequirements[colorAspectIndex];
278
279                 const VkImageAspectFlags        aspectMask                      = aspectRequirements.formatProperties.aspectMask;
280                 const VkExtent3D                        imageGranularity        = aspectRequirements.formatProperties.imageGranularity;
281
282                 DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
283
284                 const deUint32 memoryType = findMatchingMemoryType(instance, physicalDevice, imageMemoryRequirements, MemoryRequirement::Any);
285
286                 if (memoryType == NO_MATCH_FOUND)
287                         return tcu::TestStatus::fail("No matching memory type found");
288
289                 // Bind memory for each layer
290                 for (deUint32 layerNdx = 0; layerNdx < imageSparseInfo.arrayLayers; ++layerNdx)
291                 {
292                         for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
293                         {
294                                 const VkExtent3D                        mipExtent               = mipLevelExtents(imageSparseInfo.extent, mipLevelNdx);
295                                 const tcu::UVec3                        sparseBlocks    = alignedDivide(mipExtent, imageGranularity);
296                                 const deUint32                          numSparseBlocks = sparseBlocks.x() * sparseBlocks.y() * sparseBlocks.z();
297                                 const VkImageSubresource        subresource             = { aspectMask, mipLevelNdx, layerNdx };
298
299                                 const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(deviceInterface, *m_logicalDevice,
300                                         imageMemoryRequirements.alignment * numSparseBlocks, memoryType, subresource, makeOffset3D(0u, 0u, 0u), mipExtent);
301
302                                 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
303
304                                 imageResidencyMemoryBinds.push_back(imageMemoryBind);
305                         }
306
307                         if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageSparseInfo.mipLevels)
308                         {
309                                 const VkSparseMemoryBind imageReadMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, *m_logicalDevice,
310                                         aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
311
312                                 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageReadMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
313
314                                 imageReadMipTailBinds.push_back(imageReadMipTailMemoryBind);
315
316                                 const VkSparseMemoryBind imageWriteMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, *m_logicalDevice,
317                                         aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
318
319                                 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageWriteMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
320
321                                 imageWriteMipTailBinds.push_back(imageWriteMipTailMemoryBind);
322                         }
323                 }
324
325                 if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageSparseInfo.mipLevels)
326                 {
327                         const VkSparseMemoryBind imageReadMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, *m_logicalDevice,
328                                 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
329
330                         deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageReadMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
331
332                         imageReadMipTailBinds.push_back(imageReadMipTailMemoryBind);
333
334                         const VkSparseMemoryBind imageWriteMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, *m_logicalDevice,
335                                 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
336
337                         deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageWriteMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
338
339                         imageWriteMipTailBinds.push_back(imageWriteMipTailMemoryBind);
340                 }
341
342                 VkBindSparseInfo bindSparseInfo =
343                 {
344                         VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,             //VkStructureType                                                       sType;
345                         DE_NULL,                                                                //const void*                                                           pNext;
346                         0u,                                                                             //deUint32                                                                      waitSemaphoreCount;
347                         DE_NULL,                                                                //const VkSemaphore*                                            pWaitSemaphores;
348                         0u,                                                                             //deUint32                                                                      bufferBindCount;
349                         DE_NULL,                                                                //const VkSparseBufferMemoryBindInfo*           pBufferBinds;
350                         0u,                                                                             //deUint32                                                                      imageOpaqueBindCount;
351                         DE_NULL,                                                                //const VkSparseImageOpaqueMemoryBindInfo*      pImageOpaqueBinds;
352                         0u,                                                                             //deUint32                                                                      imageBindCount;
353                         DE_NULL,                                                                //const VkSparseImageMemoryBindInfo*            pImageBinds;
354                         2u,                                                                             //deUint32                                                                      signalSemaphoreCount;
355                         imageMemoryBindSemaphores                               //const VkSemaphore*                                            pSignalSemaphores;
356                 };
357
358                 VkSparseImageMemoryBindInfo               imageResidencyBindInfo[2];
359                 VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo[2];
360
361                 if (imageResidencyMemoryBinds.size() > 0)
362                 {
363                         imageResidencyBindInfo[0].image         = *imageRead;
364                         imageResidencyBindInfo[0].bindCount = static_cast<deUint32>(imageResidencyMemoryBinds.size());
365                         imageResidencyBindInfo[0].pBinds        = &imageResidencyMemoryBinds[0];
366
367                         imageResidencyBindInfo[1].image         = *imageWrite;
368                         imageResidencyBindInfo[1].bindCount = static_cast<deUint32>(imageResidencyMemoryBinds.size());
369                         imageResidencyBindInfo[1].pBinds        = &imageResidencyMemoryBinds[0];
370
371                         bindSparseInfo.imageBindCount           = 2u;
372                         bindSparseInfo.pImageBinds                      = imageResidencyBindInfo;
373                 }
374
375                 if (imageReadMipTailBinds.size() > 0)
376                 {
377                         imageMipTailBindInfo[0].image           = *imageRead;
378                         imageMipTailBindInfo[0].bindCount       = static_cast<deUint32>(imageReadMipTailBinds.size());
379                         imageMipTailBindInfo[0].pBinds          = &imageReadMipTailBinds[0];
380
381                         imageMipTailBindInfo[1].image           = *imageWrite;
382                         imageMipTailBindInfo[1].bindCount       = static_cast<deUint32>(imageWriteMipTailBinds.size());
383                         imageMipTailBindInfo[1].pBinds          = &imageWriteMipTailBinds[0];
384
385                         bindSparseInfo.imageOpaqueBindCount = 2u;
386                         bindSparseInfo.pImageOpaqueBinds        = imageMipTailBindInfo;
387                 }
388
389                 // Submit sparse bind commands for execution
390                 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
391         }
392
393         // Create command buffer for compute and transfer oparations
394         const Unique<VkCommandPool>       commandPool  (makeCommandPool(deviceInterface, *m_logicalDevice, computeQueue.queueFamilyIndex));
395         const Unique<VkCommandBuffer> commandBuffer(makeCommandBuffer(deviceInterface, *m_logicalDevice, *commandPool));
396
397         std::vector<VkBufferImageCopy> bufferImageCopy(imageSparseInfo.mipLevels);
398
399         {
400                 deUint32 bufferOffset = 0u;
401                 for (deUint32 mipLevelNdx = 0u; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
402                 {
403                         bufferImageCopy[mipLevelNdx] = makeBufferImageCopy(mipLevelExtents(imageSparseInfo.extent, mipLevelNdx), imageSparseInfo.arrayLayers, mipLevelNdx, bufferOffset);
404                         bufferOffset += getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, mipLevelNdx, MEM_ALIGN_BUFFERIMAGECOPY_OFFSET);
405                 }
406         }
407
408         // Start recording commands
409         beginCommandBuffer(deviceInterface, *commandBuffer);
410
411         const deUint32                          imageSizeInBytes                = getImageSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, imageSparseInfo.mipLevels, MEM_ALIGN_BUFFERIMAGECOPY_OFFSET);
412         const VkBufferCreateInfo        inputBufferCreateInfo   = makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
413
414         const de::UniquePtr<Buffer>     inputBuffer(new Buffer(deviceInterface, *m_logicalDevice, *allocator, inputBufferCreateInfo, MemoryRequirement::HostVisible));
415
416         std::vector<deUint8> referenceData(imageSizeInBytes);
417
418         for (deUint32 mipLevelNdx = 0u; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
419         {
420                 const deUint32 mipLevelSizeInBytes      = getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, mipLevelNdx);
421                 const deUint32 bufferOffset                     = static_cast<deUint32>(bufferImageCopy[mipLevelNdx].bufferOffset);
422
423                 deMemset(&referenceData[bufferOffset], mipLevelNdx + 1u, mipLevelSizeInBytes);
424         }
425
426         deMemcpy(inputBuffer->getAllocation().getHostPtr(), &referenceData[0], imageSizeInBytes);
427
428         flushMappedMemoryRange(deviceInterface, *m_logicalDevice, inputBuffer->getAllocation().getMemory(), inputBuffer->getAllocation().getOffset(), imageSizeInBytes);
429
430         {
431                 const VkBufferMemoryBarrier inputBufferBarrier = makeBufferMemoryBarrier
432                 (
433                         VK_ACCESS_HOST_WRITE_BIT,
434                         VK_ACCESS_TRANSFER_READ_BIT,
435                         inputBuffer->get(),
436                         0u,
437                         imageSizeInBytes
438                 );
439
440                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL);
441         }
442
443         {
444                 const VkImageMemoryBarrier imageSparseTransferDstBarrier = makeImageMemoryBarrier
445                 (
446                         0u,
447                         VK_ACCESS_TRANSFER_WRITE_BIT,
448                         VK_IMAGE_LAYOUT_UNDEFINED,
449                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
450                         sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex  : VK_QUEUE_FAMILY_IGNORED,
451                         sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
452                         *imageRead,
453                         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)
454                 );
455
456                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseTransferDstBarrier);
457         }
458
459         deviceInterface.cmdCopyBufferToImage(*commandBuffer, inputBuffer->get(), *imageRead, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<deUint32>(bufferImageCopy.size()), &bufferImageCopy[0]);
460
461         {
462                 const VkImageMemoryBarrier imageSparseTransferSrcBarrier = makeImageMemoryBarrier
463                 (
464                         VK_ACCESS_TRANSFER_WRITE_BIT,
465                         VK_ACCESS_TRANSFER_READ_BIT,
466                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
467                         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
468                         *imageRead,
469                         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)
470                 );
471
472                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseTransferSrcBarrier);
473         }
474
475         {
476                 const VkImageMemoryBarrier imageSparseShaderStorageBarrier = makeImageMemoryBarrier
477                 (
478                         0u,
479                         VK_ACCESS_SHADER_WRITE_BIT,
480                         VK_IMAGE_LAYOUT_UNDEFINED,
481                         VK_IMAGE_LAYOUT_GENERAL,
482                         *imageWrite,
483                         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)
484                 );
485
486                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseShaderStorageBarrier);
487         }
488
489         // Create descriptor set layout
490         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
491                 DescriptorSetLayoutBuilder()
492                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
493                 .build(deviceInterface, *m_logicalDevice));
494
495         Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, *m_logicalDevice, *descriptorSetLayout));
496
497         Unique<VkDescriptorPool> descriptorPool(
498                 DescriptorPoolBuilder()
499                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, imageSparseInfo.mipLevels)
500                 .build(deviceInterface, *m_logicalDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, imageSparseInfo.mipLevels));
501
502         typedef de::SharedPtr< Unique<VkImageView> >            SharedVkImageView;
503         std::vector<SharedVkImageView>                                          imageViews;
504         imageViews.resize(imageSparseInfo.mipLevels);
505
506         typedef de::SharedPtr< Unique<VkDescriptorSet> >        SharedVkDescriptorSet;
507         std::vector<SharedVkDescriptorSet>                                      descriptorSets;
508         descriptorSets.resize(imageSparseInfo.mipLevels);
509
510         typedef de::SharedPtr< Unique<VkPipeline> >                     SharedVkPipeline;
511         std::vector<SharedVkPipeline>                                           computePipelines;
512         computePipelines.resize(imageSparseInfo.mipLevels);
513
514         for (deUint32 mipLevelNdx = 0u; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
515         {
516                 std::ostringstream name;
517                 name << "comp" << mipLevelNdx;
518
519                 // Create and bind compute pipeline
520                 Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, *m_logicalDevice, m_context.getBinaryCollection().get(name.str()), DE_NULL));
521
522                 computePipelines[mipLevelNdx]   = makeVkSharedPtr(makeComputePipeline(deviceInterface, *m_logicalDevice, *pipelineLayout, *shaderModule));
523                 VkPipeline computePipeline              = **computePipelines[mipLevelNdx];
524
525                 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline);
526
527                 // Create and bind descriptor set
528                 descriptorSets[mipLevelNdx]             = makeVkSharedPtr(makeDescriptorSet(deviceInterface, *m_logicalDevice, *descriptorPool, *descriptorSetLayout));
529                 VkDescriptorSet descriptorSet   = **descriptorSets[mipLevelNdx];
530
531                 // Select which mipmap level to bind
532                 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, mipLevelNdx, 1u, 0u, imageSparseInfo.arrayLayers);
533
534                 imageViews[mipLevelNdx] = makeVkSharedPtr(makeImageView(deviceInterface, *m_logicalDevice, *imageWrite, mapImageViewType(m_imageType), imageSparseInfo.format, subresourceRange));
535                 VkImageView imageView   = **imageViews[mipLevelNdx];
536
537                 const VkDescriptorImageInfo sparseImageInfo = makeDescriptorImageInfo(DE_NULL, imageView, VK_IMAGE_LAYOUT_GENERAL);
538
539                 DescriptorSetUpdateBuilder()
540                         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &sparseImageInfo)
541                         .update(deviceInterface, *m_logicalDevice);
542
543                 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
544
545                 const tcu::UVec3        gridSize                        = getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
546                 const deUint32          xWorkGroupSize          = std::min(std::min(gridSize.x(), maxWorkGroupSize.x()), maxWorkGroupInvocations);
547                 const deUint32          yWorkGroupSize          = std::min(std::min(gridSize.y(), maxWorkGroupSize.y()), maxWorkGroupInvocations / xWorkGroupSize);
548                 const deUint32          zWorkGroupSize          = std::min(std::min(gridSize.z(), maxWorkGroupSize.z()), maxWorkGroupInvocations / (xWorkGroupSize * yWorkGroupSize));
549
550                 const deUint32          xWorkGroupCount         = gridSize.x() / xWorkGroupSize + (gridSize.x() % xWorkGroupSize ? 1u : 0u);
551                 const deUint32          yWorkGroupCount         = gridSize.y() / yWorkGroupSize + (gridSize.y() % yWorkGroupSize ? 1u : 0u);
552                 const deUint32          zWorkGroupCount         = gridSize.z() / zWorkGroupSize + (gridSize.z() % zWorkGroupSize ? 1u : 0u);
553
554                 if (maxWorkGroupCount.x() < xWorkGroupCount ||
555                         maxWorkGroupCount.y() < yWorkGroupCount ||
556                         maxWorkGroupCount.z() < zWorkGroupCount)
557                         TCU_THROW(NotSupportedError, "Image size is not supported");
558
559                 deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
560         }
561
562         {
563                 const VkMemoryBarrier memoryBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
564
565                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 1u, &memoryBarrier, 0u, DE_NULL, 0u, DE_NULL);
566         }
567
568         const VkBufferCreateInfo        outputBufferCreateInfo  = makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
569         const de::UniquePtr<Buffer>     outputBuffer                    (new Buffer(deviceInterface, *m_logicalDevice, *allocator, outputBufferCreateInfo, MemoryRequirement::HostVisible));
570
571         deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *imageRead, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, outputBuffer->get(), static_cast<deUint32>(bufferImageCopy.size()), &bufferImageCopy[0]);
572
573         {
574                 const VkBufferMemoryBarrier outputBufferBarrier = makeBufferMemoryBarrier
575                 (
576                         VK_ACCESS_TRANSFER_WRITE_BIT,
577                         VK_ACCESS_HOST_READ_BIT,
578                         outputBuffer->get(),
579                         0u,
580                         imageSizeInBytes
581                 );
582
583                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL);
584         }
585
586         // End recording commands
587         endCommandBuffer(deviceInterface, *commandBuffer);
588
589         const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
590
591         // Submit commands for execution and wait for completion
592         submitCommandsAndWait(deviceInterface, *m_logicalDevice, computeQueue.queueHandle, *commandBuffer, 2u, imageMemoryBindSemaphores, stageBits);
593
594         // Retrieve data from buffer to host memory
595         const Allocation& allocation = outputBuffer->getAllocation();
596         invalidateMappedMemoryRange(deviceInterface, *m_logicalDevice, allocation.getMemory(), allocation.getOffset(), imageSizeInBytes);
597
598         const deUint8* outputData = static_cast<const deUint8*>(allocation.getHostPtr());
599
600         // Wait for sparse queue to become idle
601         deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
602
603         for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
604         {
605                 const tcu::UVec3                                  gridSize              = getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
606                 const deUint32                                    bufferOffset  = static_cast<deUint32>(bufferImageCopy[mipLevelNdx].bufferOffset);
607                 const tcu::ConstPixelBufferAccess pixelBuffer   = tcu::ConstPixelBufferAccess(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputData + bufferOffset);
608
609                 for (deUint32 offsetZ = 0u; offsetZ < gridSize.z(); ++offsetZ)
610                 for (deUint32 offsetY = 0u; offsetY < gridSize.y(); ++offsetY)
611                 for (deUint32 offsetX = 0u; offsetX < gridSize.x(); ++offsetX)
612                 {
613                         const deUint32 index                    = offsetX + (offsetY + offsetZ * gridSize.y()) * gridSize.x();
614                         const tcu::UVec4 referenceValue = tcu::UVec4(index % MODULO_DIVISOR, index % MODULO_DIVISOR, index % MODULO_DIVISOR, 1u);
615                         const tcu::UVec4 outputValue    = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
616
617                         if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
618                                 return tcu::TestStatus::fail("Failed");
619                 }
620         }
621
622         for (deUint32 mipLevelNdx = aspectRequirements.imageMipTailFirstLod; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
623         {
624                 const deUint32 mipLevelSizeInBytes      = getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, mipLevelNdx);
625                 const deUint32 bufferOffset                     = static_cast<deUint32>(bufferImageCopy[mipLevelNdx].bufferOffset);
626
627                 if (deMemCmp(outputData + bufferOffset, &referenceData[bufferOffset], mipLevelSizeInBytes) != 0)
628                         return tcu::TestStatus::fail("Failed");
629         }
630
631         return tcu::TestStatus::pass("Passed");
632 }
633
634 void ImageSparseMemoryAliasingCase::initPrograms(SourceCollections&     sourceCollections) const
635 {
636         const char* const       versionDecl                             = glu::getGLSLVersionDeclaration(m_glslVersion);
637         const std::string       imageTypeStr                    = getShaderImageType(m_format, m_imageType);
638         const std::string       formatQualifierStr              = getShaderImageFormatQualifier(m_format);
639         const std::string       formatDataStr                   = getShaderImageDataType(m_format);
640         const deUint32          maxWorkGroupInvocations = 128u;
641         const tcu::UVec3        maxWorkGroupSize                = tcu::UVec3(128u, 128u, 64u);
642
643         const tcu::UVec3        layerSize                               = getLayerSize(m_imageType, m_imageSize);
644         const deUint32          widestEdge                              = std::max(std::max(layerSize.x(), layerSize.y()), layerSize.z());
645         const deUint32          mipLevels                               = static_cast<deUint32>(deFloatLog2(static_cast<float>(widestEdge))) + 1u;
646
647         for (deUint32 mipLevelNdx = 0; mipLevelNdx < mipLevels; ++mipLevelNdx)
648         {
649                 // Create compute program
650                 const tcu::UVec3        gridSize                = getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
651                 const deUint32          xWorkGroupSize  = std::min(std::min(gridSize.x(), maxWorkGroupSize.x()), maxWorkGroupInvocations);
652                 const deUint32          yWorkGroupSize  = std::min(std::min(gridSize.y(), maxWorkGroupSize.y()), maxWorkGroupInvocations / xWorkGroupSize);
653                 const deUint32          zWorkGroupSize  = std::min(std::min(gridSize.z(), maxWorkGroupSize.z()), maxWorkGroupInvocations / (xWorkGroupSize * yWorkGroupSize));
654
655                 std::ostringstream src;
656
657                 src << versionDecl << "\n"
658                         << "layout (local_size_x = " << xWorkGroupSize << ", local_size_y = " << yWorkGroupSize << ", local_size_z = " << zWorkGroupSize << ") in; \n"
659                         << "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
660                         << "void main (void)\n"
661                         << "{\n"
662                         << "    if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
663                         << "    if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
664                         << "    if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
665                         << "    {\n"
666                         << "            int index = int(gl_GlobalInvocationID.x + (gl_GlobalInvocationID.y + gl_GlobalInvocationID.z*" << gridSize.y() << ")*" << gridSize.x() << ");\n"
667                         << "            imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
668                         << formatDataStr << "( index % " << MODULO_DIVISOR << ", index % " << MODULO_DIVISOR << ", index % " << MODULO_DIVISOR << ", 1 )); \n"
669                         << "    }\n"
670                         << "}\n";
671
672                 std::ostringstream name;
673                 name << "comp" << mipLevelNdx;
674                 sourceCollections.glslSources.add(name.str()) << glu::ComputeSource(src.str());
675         }
676 }
677
678 TestInstance* ImageSparseMemoryAliasingCase::createInstance (Context& context) const
679 {
680         return new ImageSparseMemoryAliasingInstance(context, m_imageType, m_imageSize, m_format);
681 }
682
683 } // anonymous ns
684
685 tcu::TestCaseGroup* createImageSparseMemoryAliasingTests (tcu::TestContext& testCtx)
686 {
687         de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_memory_aliasing", "Sparse Image Memory Aliasing"));
688
689         static const deUint32 sizeCountPerImageType = 4u;
690
691         struct ImageParameters
692         {
693                 ImageType       imageType;
694                 tcu::UVec3      imageSizes[sizeCountPerImageType];
695         };
696
697         static const ImageParameters imageParametersArray[] =
698         {
699                 { IMAGE_TYPE_2D,                { tcu::UVec3(512u, 256u, 1u),   tcu::UVec3(128u, 128u, 1u),     tcu::UVec3(503u, 137u, 1u),     tcu::UVec3(11u, 37u, 1u) } },
700                 { IMAGE_TYPE_2D_ARRAY,  { tcu::UVec3(512u, 256u, 6u),   tcu::UVec3(128u, 128u, 8u),     tcu::UVec3(503u, 137u, 3u),     tcu::UVec3(11u, 37u, 3u) } },
701                 { IMAGE_TYPE_CUBE,              { tcu::UVec3(256u, 256u, 1u),   tcu::UVec3(128u, 128u, 1u),     tcu::UVec3(137u, 137u, 1u),     tcu::UVec3(11u, 11u, 1u) } },
702                 { IMAGE_TYPE_CUBE_ARRAY,{ tcu::UVec3(256u, 256u, 6u),   tcu::UVec3(128u, 128u, 8u),     tcu::UVec3(137u, 137u, 3u),     tcu::UVec3(11u, 11u, 3u) } },
703                 { IMAGE_TYPE_3D,                { tcu::UVec3(256u, 256u, 16u),  tcu::UVec3(128u, 128u, 8u),     tcu::UVec3(503u, 137u, 3u),     tcu::UVec3(11u, 37u, 3u) } }
704         };
705
706         static const tcu::TextureFormat formats[] =
707         {
708                 tcu::TextureFormat(tcu::TextureFormat::R,        tcu::TextureFormat::SIGNED_INT32),
709                 tcu::TextureFormat(tcu::TextureFormat::R,        tcu::TextureFormat::SIGNED_INT16),
710                 tcu::TextureFormat(tcu::TextureFormat::R,        tcu::TextureFormat::SIGNED_INT8),
711                 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT32),
712                 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT16),
713                 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT8)
714         };
715
716         for (deInt32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray); ++imageTypeNdx)
717         {
718                 const ImageType                                 imageType = imageParametersArray[imageTypeNdx].imageType;
719                 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
720
721                 for (deInt32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); ++formatNdx)
722                 {
723                         const tcu::TextureFormat&               format = formats[formatNdx];
724                         de::MovePtr<tcu::TestCaseGroup> formatGroup(new tcu::TestCaseGroup(testCtx, getShaderImageFormatQualifier(format).c_str(), ""));
725
726                         for (deInt32 imageSizeNdx = 0; imageSizeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray[imageTypeNdx].imageSizes); ++imageSizeNdx)
727                         {
728                                 const tcu::UVec3 imageSize = imageParametersArray[imageTypeNdx].imageSizes[imageSizeNdx];
729
730                                 std::ostringstream stream;
731                                 stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
732
733                                 formatGroup->addChild(new ImageSparseMemoryAliasingCase(testCtx, stream.str(), "", imageType, imageSize, format, glu::GLSL_VERSION_440));
734                         }
735                         imageTypeGroup->addChild(formatGroup.release());
736                 }
737                 testGroup->addChild(imageTypeGroup.release());
738         }
739
740         return testGroup.release();
741 }
742
743 } // sparse
744 } // vkt