Improve GLSL source program support
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / sparse_resources / vktSparseResourcesImageSparseResidency.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktSparseResourcesImageSparseResidency.cpp
21  * \brief Sparse partially resident images tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferSparseBinding.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkMemUtil.hpp"
35 #include "vkBuilderUtil.hpp"
36 #include "vkImageUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkTypeUtil.hpp"
39
40 #include "deUniquePtr.hpp"
41 #include "deStringUtil.hpp"
42
43 #include <string>
44 #include <vector>
45
46 using namespace vk;
47
48 namespace vkt
49 {
50 namespace sparse
51 {
52 namespace
53 {
54
55 const std::string getCoordStr  (const ImageType         imageType,
56                                                                 const std::string&      x,
57                                                                 const std::string&      y,
58                                                                 const std::string&      z)
59 {
60         switch (imageType)
61         {
62                 case IMAGE_TYPE_1D:
63                 case IMAGE_TYPE_BUFFER:
64                         return x;
65
66                 case IMAGE_TYPE_1D_ARRAY:
67                 case IMAGE_TYPE_2D:
68                         return "ivec2(" + x + "," + y + ")";
69
70                 case IMAGE_TYPE_2D_ARRAY:
71                 case IMAGE_TYPE_3D:
72                 case IMAGE_TYPE_CUBE:
73                 case IMAGE_TYPE_CUBE_ARRAY:
74                         return "ivec3(" + x + "," + y + "," + z + ")";
75
76                 default:
77                         DE_ASSERT(false);
78                         return "";
79         }
80 }
81
82 tcu::UVec3 alignedDivide (const VkExtent3D& extent, const VkExtent3D& divisor)
83 {
84         tcu::UVec3 result;
85
86         result.x() = extent.width  / divisor.width  + ((extent.width  % divisor.width)  ? 1u : 0u);
87         result.y() = extent.height / divisor.height + ((extent.height % divisor.height) ? 1u : 0u);
88         result.z() = extent.depth  / divisor.depth  + ((extent.depth  % divisor.depth)  ? 1u : 0u);
89
90         return result;
91 }
92
93 tcu::UVec3 computeWorkGroupSize (const tcu::UVec3& gridSize)
94 {
95         const deUint32          maxComputeWorkGroupInvocations  = 128u;
96         const tcu::UVec3        maxComputeWorkGroupSize                 = tcu::UVec3(128u, 128u, 64u);
97
98         const deUint32 xWorkGroupSize = std::min(std::min(gridSize.x(), maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
99         const deUint32 yWorkGroupSize = std::min(std::min(gridSize.y(), maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations /  xWorkGroupSize);
100         const deUint32 zWorkGroupSize = std::min(std::min(gridSize.z(), maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
101
102         return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
103 }
104
105 class ImageSparseResidencyCase : public TestCase
106 {
107 public:
108                                         ImageSparseResidencyCase        (tcu::TestContext&                      testCtx,
109                                                                                                  const std::string&                     name,
110                                                                                                  const std::string&                     description,
111                                                                                                  const ImageType                        imageType,
112                                                                                                  const tcu::UVec3&                      imageSize,
113                                                                                                  const tcu::TextureFormat&      format,
114                                                                                                  const glu::GLSLVersion         glslVersion);
115
116         void                    initPrograms                            (SourceCollections&                     sourceCollections) const;
117         TestInstance*   createInstance                          (Context&                                       context) const;
118
119 private:
120         const ImageType                         m_imageType;
121         const tcu::UVec3                        m_imageSize;
122         const tcu::TextureFormat        m_format;
123         const glu::GLSLVersion          m_glslVersion;
124 };
125
126 ImageSparseResidencyCase::ImageSparseResidencyCase (tcu::TestContext&                   testCtx,
127                                                                                                         const std::string&                      name,
128                                                                                                         const std::string&                      description,
129                                                                                                         const ImageType                         imageType,
130                                                                                                         const tcu::UVec3&                       imageSize,
131                                                                                                         const tcu::TextureFormat&       format,
132                                                                                                         const glu::GLSLVersion          glslVersion)
133         : TestCase                              (testCtx, name, description)
134         , m_imageType                   (imageType)
135         , m_imageSize                   (imageSize)
136         , m_format                              (format)
137         , m_glslVersion                 (glslVersion)
138 {
139 }
140
141 void ImageSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const
142 {
143         // Create compute program
144         const char* const versionDecl                   = glu::getGLSLVersionDeclaration(m_glslVersion);
145         const std::string imageTypeStr                  = getShaderImageType(m_format, m_imageType);
146         const std::string formatQualifierStr    = getShaderImageFormatQualifier(m_format);
147         const std::string formatDataStr                 = getShaderImageDataType(m_format);
148         const tcu::UVec3  gridSize                              = getShaderGridSize(m_imageType, m_imageSize);
149         const tcu::UVec3  workGroupSize                 = computeWorkGroupSize(gridSize);
150
151         std::ostringstream src;
152         src << versionDecl << "\n"
153                 << "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in; \n"
154                 << "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
155                 << "void main (void)\n"
156                 << "{\n"
157                 << "    if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
158                 << "    if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
159                 << "    if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
160                 << "    {\n"
161                 << "            imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
162                 << formatDataStr << "( int(gl_GlobalInvocationID.x) % 127, int(gl_GlobalInvocationID.y) % 127, int(gl_GlobalInvocationID.z) % 127, 1));\n"
163                 << "    }\n"
164                 << "}\n";
165
166         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
167 }
168
169 class ImageSparseResidencyInstance : public SparseResourcesBaseInstance
170 {
171 public:
172                                         ImageSparseResidencyInstance(Context&                                                                    context,
173                                                                                                  const ImageType                                                         imageType,
174                                                                                                  const tcu::UVec3&                                                       imageSize,
175                                                                                                  const tcu::TextureFormat&                                       format);
176
177         tcu::TestStatus iterate                                         (void);
178
179 private:
180         const ImageType                         m_imageType;
181         const tcu::UVec3                        m_imageSize;
182         const tcu::TextureFormat        m_format;
183 };
184
185 ImageSparseResidencyInstance::ImageSparseResidencyInstance (Context&                                    context,
186                                                                                                                         const ImageType                         imageType,
187                                                                                                                         const tcu::UVec3&                       imageSize,
188                                                                                                                         const tcu::TextureFormat&       format)
189         : SparseResourcesBaseInstance   (context)
190         , m_imageType                                   (imageType)
191         , m_imageSize                                   (imageSize)
192         , m_format                                              (format)
193 {
194 }
195
196 tcu::TestStatus ImageSparseResidencyInstance::iterate (void)
197 {
198         const InstanceInterface&                        instance = m_context.getInstanceInterface();
199         const VkPhysicalDevice                          physicalDevice = m_context.getPhysicalDevice();
200         const VkPhysicalDeviceProperties        physicalDeviceProperties = getPhysicalDeviceProperties(instance, physicalDevice);
201         VkImageCreateInfo                                       imageCreateInfo;
202         VkSparseImageMemoryRequirements         aspectRequirements;
203         VkExtent3D                                                      imageGranularity;
204         std::vector<DeviceMemorySp>                     deviceMemUniquePtrVec;
205
206         // Check if image size does not exceed device limits
207         if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
208                 TCU_THROW(NotSupportedError, "Image size not supported for device");
209
210         // Check if device supports sparse operations for image type
211         if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
212                 TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
213
214         imageCreateInfo.sType                                   = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
215         imageCreateInfo.pNext                                   = DE_NULL;
216         imageCreateInfo.flags                                   = VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
217         imageCreateInfo.imageType                               = mapImageType(m_imageType);
218         imageCreateInfo.format                                  = mapTextureFormat(m_format);
219         imageCreateInfo.extent                                  = makeExtent3D(getLayerSize(m_imageType, m_imageSize));
220         imageCreateInfo.mipLevels                               = 1u;
221         imageCreateInfo.arrayLayers                             = getNumLayers(m_imageType, m_imageSize);
222         imageCreateInfo.samples                                 = VK_SAMPLE_COUNT_1_BIT;
223         imageCreateInfo.tiling                                  = VK_IMAGE_TILING_OPTIMAL;
224         imageCreateInfo.initialLayout                   = VK_IMAGE_LAYOUT_UNDEFINED;
225         imageCreateInfo.usage                                   = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
226                                                                                           VK_IMAGE_USAGE_STORAGE_BIT;
227         imageCreateInfo.sharingMode                             = VK_SHARING_MODE_EXCLUSIVE;
228         imageCreateInfo.queueFamilyIndexCount   = 0u;
229         imageCreateInfo.pQueueFamilyIndices             = DE_NULL;
230
231         if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
232         {
233                 imageCreateInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
234         }
235
236         // Check if device supports sparse operations for image format
237         if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageCreateInfo))
238                 TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
239
240         {
241                 // Create logical device supporting both sparse and compute queues
242                 QueueRequirementsVec queueRequirements;
243                 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
244                 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
245
246                 createDeviceSupportingQueues(queueRequirements);
247         }
248
249         const DeviceInterface&  deviceInterface = getDeviceInterface();
250         const Queue&                    sparseQueue             = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
251         const Queue&                    computeQueue    = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
252
253         // Create sparse image
254         const Unique<VkImage> sparseImage(createImage(deviceInterface, getDevice(), &imageCreateInfo));
255
256         // Create sparse image memory bind semaphore
257         const Unique<VkSemaphore> imageMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
258
259         {
260                 // Get image general memory requirements
261                 const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
262
263                 if (imageMemoryRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
264                         TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
265
266                 DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
267
268                 // Get sparse image sparse memory requirements
269                 const std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
270
271                 DE_ASSERT(sparseMemoryRequirements.size() != 0);
272
273                 const deUint32 colorAspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_COLOR_BIT);
274
275                 if (colorAspectIndex == NO_MATCH_FOUND)
276                         TCU_THROW(NotSupportedError, "Not supported image aspect - the test supports currently only VK_IMAGE_ASPECT_COLOR_BIT");
277
278                 aspectRequirements      = sparseMemoryRequirements[colorAspectIndex];
279                 imageGranularity        = aspectRequirements.formatProperties.imageGranularity;
280
281                 const VkImageAspectFlags aspectMask = aspectRequirements.formatProperties.aspectMask;
282
283                 DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
284
285                 std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
286                 std::vector<VkSparseMemoryBind>          imageMipTailMemoryBinds;
287
288                 const deUint32                                           memoryType = findMatchingMemoryType(instance, physicalDevice, imageMemoryRequirements, MemoryRequirement::Any);
289
290                 if (memoryType == NO_MATCH_FOUND)
291                         return tcu::TestStatus::fail("No matching memory type found");
292
293                 // Bind device memory for each aspect
294                 for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
295                 {
296                         for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
297                         {
298                                 const VkImageSubresource subresource            = { aspectMask, mipLevelNdx, layerNdx };
299                                 const VkExtent3D                 mipExtent                      = mipLevelExtents(imageCreateInfo.extent, mipLevelNdx);
300                                 const tcu::UVec3                 numSparseBinds         = alignedDivide(mipExtent, imageGranularity);
301                                 const tcu::UVec3                 lastBlockExtent        = tcu::UVec3(mipExtent.width  % imageGranularity.width  ? mipExtent.width   % imageGranularity.width  : imageGranularity.width,
302                                                                                                                                                  mipExtent.height % imageGranularity.height ? mipExtent.height  % imageGranularity.height : imageGranularity.height,
303                                                                                                                                                  mipExtent.depth  % imageGranularity.depth  ? mipExtent.depth   % imageGranularity.depth  : imageGranularity.depth);
304                                 for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
305                                 for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
306                                 for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
307                                 {
308                                         const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
309
310                                         if (linearIndex % 2u == 1u)
311                                         {
312                                                 continue;
313                                         }
314
315                                         VkOffset3D offset;
316                                         offset.x = x*imageGranularity.width;
317                                         offset.y = y*imageGranularity.height;
318                                         offset.z = z*imageGranularity.depth;
319
320                                         VkExtent3D extent;
321                                         extent.width  = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
322                                         extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
323                                         extent.depth  = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
324
325                                         const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(deviceInterface, getDevice(),
326                                                 imageMemoryRequirements.alignment, memoryType, subresource, offset, extent);
327
328                                         deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
329
330                                         imageResidencyMemoryBinds.push_back(imageMemoryBind);
331                                 }
332                         }
333
334                         if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
335                         {
336                                 const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
337                                         aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
338
339                                 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
340
341                                 imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
342                         }
343                 }
344
345                 if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
346                 {
347                         const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
348                                 aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
349
350                         deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
351
352                         imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
353                 }
354
355                 VkBindSparseInfo bindSparseInfo =
356                 {
357                         VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,                     //VkStructureType                                                       sType;
358                         DE_NULL,                                                                        //const void*                                                           pNext;
359                         0u,                                                                                     //deUint32                                                                      waitSemaphoreCount;
360                         DE_NULL,                                                                        //const VkSemaphore*                                            pWaitSemaphores;
361                         0u,                                                                                     //deUint32                                                                      bufferBindCount;
362                         DE_NULL,                                                                        //const VkSparseBufferMemoryBindInfo*           pBufferBinds;
363                         0u,                                                                                     //deUint32                                                                      imageOpaqueBindCount;
364                         DE_NULL,                                                                        //const VkSparseImageOpaqueMemoryBindInfo*      pImageOpaqueBinds;
365                         0u,                                                                                     //deUint32                                                                      imageBindCount;
366                         DE_NULL,                                                                        //const VkSparseImageMemoryBindInfo*            pImageBinds;
367                         1u,                                                                                     //deUint32                                                                      signalSemaphoreCount;
368                         &imageMemoryBindSemaphore.get()                         //const VkSemaphore*                                            pSignalSemaphores;
369                 };
370
371                 VkSparseImageMemoryBindInfo               imageResidencyBindInfo;
372                 VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo;
373
374                 if (imageResidencyMemoryBinds.size() > 0)
375                 {
376                         imageResidencyBindInfo.image            = *sparseImage;
377                         imageResidencyBindInfo.bindCount        = static_cast<deUint32>(imageResidencyMemoryBinds.size());
378                         imageResidencyBindInfo.pBinds           = &imageResidencyMemoryBinds[0];
379
380                         bindSparseInfo.imageBindCount           = 1u;
381                         bindSparseInfo.pImageBinds                      = &imageResidencyBindInfo;
382                 }
383
384                 if (imageMipTailMemoryBinds.size() > 0)
385                 {
386                         imageMipTailBindInfo.image                      = *sparseImage;
387                         imageMipTailBindInfo.bindCount          = static_cast<deUint32>(imageMipTailMemoryBinds.size());
388                         imageMipTailBindInfo.pBinds                     = &imageMipTailMemoryBinds[0];
389
390                         bindSparseInfo.imageOpaqueBindCount = 1u;
391                         bindSparseInfo.pImageOpaqueBinds        = &imageMipTailBindInfo;
392                 }
393
394                 // Submit sparse bind commands for execution
395                 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
396         }
397
398         // Create command buffer for compute and transfer oparations
399         const Unique<VkCommandPool>       commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
400         const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
401
402         // Start recording commands
403         beginCommandBuffer(deviceInterface, *commandBuffer);
404
405         // Create descriptor set layout
406         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
407                 DescriptorSetLayoutBuilder()
408                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
409                 .build(deviceInterface, getDevice()));
410
411         // Create and bind compute pipeline
412         const Unique<VkShaderModule>    shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
413         const Unique<VkPipelineLayout>  pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
414         const Unique<VkPipeline>                computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
415
416         deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
417
418         // Create and bind descriptor set
419         const Unique<VkDescriptorPool> descriptorPool(
420                 DescriptorPoolBuilder()
421                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u)
422                 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
423
424         const Unique<VkDescriptorSet>   descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
425
426         const VkImageSubresourceRange   subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
427         const Unique<VkImageView>               imageView(makeImageView(deviceInterface, getDevice(), *sparseImage, mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange));
428         const VkDescriptorImageInfo             sparseImageInfo  = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
429
430         DescriptorSetUpdateBuilder()
431                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &sparseImageInfo)
432                 .update(deviceInterface, getDevice());
433
434         deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
435
436         {
437                 const VkImageMemoryBarrier sparseImageLayoutChangeBarrier = makeImageMemoryBarrier
438                 (
439                         0u,
440                         VK_ACCESS_SHADER_WRITE_BIT,
441                         VK_IMAGE_LAYOUT_UNDEFINED,
442                         VK_IMAGE_LAYOUT_GENERAL,
443                         sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
444                         sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
445                         *sparseImage,
446                         subresourceRange
447                 );
448
449                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageLayoutChangeBarrier);
450         }
451
452         const tcu::UVec3  gridSize = getShaderGridSize(m_imageType, m_imageSize);
453
454         {
455                 const tcu::UVec3  workGroupSize = computeWorkGroupSize(gridSize);
456
457                 const deUint32 xWorkGroupCount = gridSize.x() / workGroupSize.x() + (gridSize.x() % workGroupSize.x() ? 1u : 0u);
458                 const deUint32 yWorkGroupCount = gridSize.y() / workGroupSize.y() + (gridSize.y() % workGroupSize.y() ? 1u : 0u);
459                 const deUint32 zWorkGroupCount = gridSize.z() / workGroupSize.z() + (gridSize.z() % workGroupSize.z() ? 1u : 0u);
460
461                 const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
462
463                 if (maxComputeWorkGroupCount.x() < xWorkGroupCount ||
464                         maxComputeWorkGroupCount.y() < yWorkGroupCount ||
465                         maxComputeWorkGroupCount.z() < zWorkGroupCount)
466                 {
467                         TCU_THROW(NotSupportedError, "Image size is not supported");
468                 }
469
470                 deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
471         }
472
473         {
474                 const VkImageMemoryBarrier sparseImageTrasferBarrier = makeImageMemoryBarrier
475                 (
476                         VK_ACCESS_SHADER_WRITE_BIT,
477                         VK_ACCESS_TRANSFER_READ_BIT,
478                         VK_IMAGE_LAYOUT_GENERAL,
479                         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
480                         *sparseImage,
481                         subresourceRange
482                 );
483
484                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageTrasferBarrier);
485         }
486
487         const deUint32                                  imageSizeInBytes                = getNumPixels(m_imageType, m_imageSize) * tcu::getPixelSize(m_format);
488         const VkBufferCreateInfo                outputBufferCreateInfo  = makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
489         const Unique<VkBuffer>                  outputBuffer                    (createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
490         const de::UniquePtr<Allocation> outputBufferAlloc               (bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
491
492         {
493                 const VkBufferImageCopy bufferImageCopy = makeBufferImageCopy(imageCreateInfo.extent, imageCreateInfo.arrayLayers);
494
495                 deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *sparseImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &bufferImageCopy);
496         }
497
498         {
499                 const VkBufferMemoryBarrier outputBufferHostReadBarrier = makeBufferMemoryBarrier
500                 (
501                         VK_ACCESS_TRANSFER_WRITE_BIT,
502                         VK_ACCESS_HOST_READ_BIT,
503                         *outputBuffer,
504                         0u,
505                         imageSizeInBytes
506                 );
507
508                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostReadBarrier, 0u, DE_NULL);
509         }
510
511         // End recording commands
512         endCommandBuffer(deviceInterface, *commandBuffer);
513
514         // The stage at which execution is going to wait for finish of sparse binding operations
515         const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
516
517         // Submit commands for execution and wait for completion
518         submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &imageMemoryBindSemaphore.get(), stageBits);
519
520         // Retrieve data from buffer to host memory
521         invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), imageSizeInBytes);
522
523         const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
524         const tcu::ConstPixelBufferAccess pixelBuffer = tcu::ConstPixelBufferAccess(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputData);
525
526         // Wait for sparse queue to become idle
527         deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
528
529         // Validate results
530         if( aspectRequirements.imageMipTailFirstLod > 0u )
531         {
532                 const VkExtent3D                 mipExtent               = mipLevelExtents(imageCreateInfo.extent, 0u);
533                 const tcu::UVec3                 numSparseBinds  = alignedDivide(mipExtent, imageGranularity);
534                 const tcu::UVec3                 lastBlockExtent = tcu::UVec3(  mipExtent.width  % imageGranularity.width  ? mipExtent.width  % imageGranularity.width  : imageGranularity.width,
535                                                                                                                                 mipExtent.height % imageGranularity.height ? mipExtent.height % imageGranularity.height : imageGranularity.height,
536                                                                                                                                 mipExtent.depth  % imageGranularity.depth  ? mipExtent.depth  % imageGranularity.depth  : imageGranularity.depth);
537
538                 for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
539                 {
540                         for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
541                         for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
542                         for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
543                         {
544                                 VkExtent3D offset;
545                                 offset.width  = x*imageGranularity.width;
546                                 offset.height = y*imageGranularity.height;
547                                 offset.depth  = z*imageGranularity.depth + layerNdx*numSparseBinds.z()*imageGranularity.depth;
548
549                                 VkExtent3D extent;
550                                 extent.width  = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
551                                 extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
552                                 extent.depth  = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
553
554                                 const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
555
556                                 if (linearIndex % 2u == 0u)
557                                 {
558                                         for (deUint32 offsetZ = offset.depth;  offsetZ < offset.depth  + extent.depth;  ++offsetZ)
559                                         for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
560                                         for (deUint32 offsetX = offset.width;  offsetX < offset.width  + extent.width;  ++offsetX)
561                                         {
562                                                 const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
563                                                 const tcu::UVec4 outputValue    = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
564
565                                                 if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
566                                                         return tcu::TestStatus::fail("Failed");
567                                         }
568                                 }
569                                 else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
570                                 {
571                                         for (deUint32 offsetZ = offset.depth;  offsetZ < offset.depth  + extent.depth;  ++offsetZ)
572                                         for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
573                                         for (deUint32 offsetX = offset.width;  offsetX < offset.width  + extent.width;  ++offsetX)
574                                         {
575                                                 const tcu::UVec4 referenceValue = tcu::UVec4(0u, 0u, 0u, 0u);
576                                                 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
577
578                                                 if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
579                                                         return tcu::TestStatus::fail("Failed");
580                                         }
581                                 }
582                         }
583                 }
584         }
585         else
586         {
587                 const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, 0u);
588
589                 for (deUint32 offsetZ = 0u; offsetZ < mipExtent.depth * imageCreateInfo.arrayLayers; ++offsetZ)
590                 for (deUint32 offsetY = 0u; offsetY < mipExtent.height; ++offsetY)
591                 for (deUint32 offsetX = 0u; offsetX < mipExtent.width;  ++offsetX)
592                 {
593                         const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
594                         const tcu::UVec4 outputValue    = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
595
596                         if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
597                                 return tcu::TestStatus::fail("Failed");
598                 }
599         }
600
601         return tcu::TestStatus::pass("Passed");
602 }
603
604 TestInstance* ImageSparseResidencyCase::createInstance (Context& context) const
605 {
606         return new ImageSparseResidencyInstance(context, m_imageType, m_imageSize, m_format);
607 }
608
609 } // anonymous ns
610
611 tcu::TestCaseGroup* createImageSparseResidencyTests (tcu::TestContext& testCtx)
612 {
613         de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_residency", "Buffer Sparse Residency"));
614
615         static const deUint32 sizeCountPerImageType = 3u;
616
617         struct ImageParameters
618         {
619                 ImageType       imageType;
620                 tcu::UVec3      imageSizes[sizeCountPerImageType];
621         };
622
623         static const ImageParameters imageParametersArray[] =
624         {
625                 { IMAGE_TYPE_2D,                 { tcu::UVec3(512u, 256u, 1u),  tcu::UVec3(1024u, 128u, 1u), tcu::UVec3(11u,  137u, 1u) } },
626                 { IMAGE_TYPE_2D_ARRAY,   { tcu::UVec3(512u, 256u, 6u),  tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u,  137u, 3u) } },
627                 { IMAGE_TYPE_CUBE,               { tcu::UVec3(256u, 256u, 1u),  tcu::UVec3(128u,  128u, 1u), tcu::UVec3(137u, 137u, 1u) } },
628                 { IMAGE_TYPE_CUBE_ARRAY, { tcu::UVec3(256u, 256u, 6u),  tcu::UVec3(128u,  128u, 8u), tcu::UVec3(137u, 137u, 3u) } },
629                 { IMAGE_TYPE_3D,                 { tcu::UVec3(512u, 256u, 16u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u,  137u, 3u) } }
630         };
631
632         static const tcu::TextureFormat formats[] =
633         {
634                 tcu::TextureFormat(tcu::TextureFormat::R,        tcu::TextureFormat::SIGNED_INT32),
635                 tcu::TextureFormat(tcu::TextureFormat::R,        tcu::TextureFormat::SIGNED_INT16),
636                 tcu::TextureFormat(tcu::TextureFormat::R,        tcu::TextureFormat::SIGNED_INT8),
637                 tcu::TextureFormat(tcu::TextureFormat::RG,       tcu::TextureFormat::SIGNED_INT32),
638                 tcu::TextureFormat(tcu::TextureFormat::RG,   tcu::TextureFormat::SIGNED_INT16),
639                 tcu::TextureFormat(tcu::TextureFormat::RG,   tcu::TextureFormat::SIGNED_INT8),
640                 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT32),
641                 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT16),
642                 tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT8)
643         };
644
645         for (deInt32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray); ++imageTypeNdx)
646         {
647                 const ImageType                                 imageType = imageParametersArray[imageTypeNdx].imageType;
648                 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
649
650                 for (deInt32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); ++formatNdx)
651                 {
652                         const tcu::TextureFormat&               format = formats[formatNdx];
653                         de::MovePtr<tcu::TestCaseGroup> formatGroup(new tcu::TestCaseGroup(testCtx, getShaderImageFormatQualifier(format).c_str(), ""));
654
655                         for (deInt32 imageSizeNdx = 0; imageSizeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray[imageTypeNdx].imageSizes); ++imageSizeNdx)
656                         {
657                                 const tcu::UVec3 imageSize = imageParametersArray[imageTypeNdx].imageSizes[imageSizeNdx];
658
659                                 std::ostringstream stream;
660                                 stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
661
662                                 formatGroup->addChild(new ImageSparseResidencyCase(testCtx, stream.str(), "", imageType, imageSize, format, glu::GLSL_VERSION_440));
663                         }
664                         imageTypeGroup->addChild(formatGroup.release());
665                 }
666                 testGroup->addChild(imageTypeGroup.release());
667         }
668
669         return testGroup.release();
670 }
671
672 } // sparse
673 } // vkt