Fix missing dependency on sparse binds
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / sparse_resources / vktSparseResourcesBufferTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Sparse buffer tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferTests.hpp"
25 #include "vktTestCaseUtil.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktSparseResourcesTestsUtil.hpp"
28 #include "vktSparseResourcesBase.hpp"
29 #include "vktSparseResourcesBufferSparseBinding.hpp"
30 #include "vktSparseResourcesBufferSparseResidency.hpp"
31 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
32
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkPlatform.hpp"
36 #include "vkPrograms.hpp"
37 #include "vkMemUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkQueryUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43
44 #include "tcuTestLog.hpp"
45
46 #include "deUniquePtr.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deMath.h"
49
50 #include <string>
51 #include <vector>
52 #include <map>
53
54 using namespace vk;
55 using de::MovePtr;
56 using de::UniquePtr;
57 using de::SharedPtr;
58 using tcu::Vec4;
59 using tcu::IVec2;
60 using tcu::IVec4;
61
62 namespace vkt
63 {
64 namespace sparse
65 {
66 namespace
67 {
68
69 typedef SharedPtr<UniquePtr<Allocation> > AllocationSp;
70
71 enum
72 {
73         RENDER_SIZE             = 128,                          //!< framebuffer size in pixels
74         GRID_SIZE               = RENDER_SIZE / 8,      //!< number of grid tiles in a row
75 };
76
77 enum TestFlagBits
78 {
79                                                                                                 //   sparseBinding is implied
80         TEST_FLAG_ALIASED                               = 1u << 0,      //!< sparseResidencyAliased
81         TEST_FLAG_RESIDENCY                             = 1u << 1,      //!< sparseResidencyBuffer
82         TEST_FLAG_NON_RESIDENT_STRICT   = 1u << 2,      //!< residencyNonResidentStrict
83         TEST_FLAG_ENABLE_DEVICE_GROUPS  = 1u << 3,      //!< device groups are enabled
84 };
85 typedef deUint32 TestFlags;
86
87 //! SparseAllocationBuilder output. Owns the allocated memory.
88 struct SparseAllocation
89 {
90         deUint32                                                        numResourceChunks;
91         VkDeviceSize                                            resourceSize;           //!< buffer size in bytes
92         std::vector<AllocationSp>                       allocations;            //!< actual allocated memory
93         std::vector<VkSparseMemoryBind>         memoryBinds;            //!< memory binds backing the resource
94         deUint32                                                        memoryType;                     //!< memory type (same for all allocations)
95         deUint32                                                        heapIndex;                      //!< memory heap index
96 };
97
98 //! Utility to lay out memory allocations for a sparse buffer, including holes and aliased regions.
99 //! Will allocate memory upon building.
100 class SparseAllocationBuilder
101 {
102 public:
103                                                                 SparseAllocationBuilder (void);
104
105         // \note "chunk" is the smallest (due to alignment) bindable amount of memory
106
107         SparseAllocationBuilder&        addMemoryHole                   (const deUint32 numChunks = 1u);
108         SparseAllocationBuilder&        addResourceHole                 (const deUint32 numChunks = 1u);
109         SparseAllocationBuilder&        addMemoryBind                   (const deUint32 numChunks = 1u);
110         SparseAllocationBuilder&        addAliasedMemoryBind    (const deUint32 allocationNdx, const deUint32 chunkOffset, const deUint32 numChunks = 1u);
111         SparseAllocationBuilder&        addMemoryAllocation             (void);
112
113         MovePtr<SparseAllocation>       build                                   (const InstanceInterface&       instanceInterface,
114                                                                                                                  const VkPhysicalDevice         physicalDevice,
115                                                                                                                  const DeviceInterface&         vk,
116                                                                                                                  const VkDevice                         device,
117                                                                                                                  Allocator&                                     allocator,
118                                                                                                                  VkBufferCreateInfo                     referenceCreateInfo,            //!< buffer size is ignored in this info
119                                                                                                                  const VkDeviceSize                     minChunkSize = 0ull) const;     //!< make sure chunks are at least this big
120
121 private:
122         struct MemoryBind
123         {
124                 deUint32        allocationNdx;
125                 deUint32        resourceChunkNdx;
126                 deUint32        memoryChunkNdx;
127                 deUint32        numChunks;
128         };
129
130         deUint32                                        m_allocationNdx;
131         deUint32                                        m_resourceChunkNdx;
132         deUint32                                        m_memoryChunkNdx;
133         std::vector<MemoryBind>         m_memoryBinds;
134         std::vector<deUint32>           m_chunksPerAllocation;
135
136 };
137
138 SparseAllocationBuilder::SparseAllocationBuilder (void)
139         : m_allocationNdx               (0)
140         , m_resourceChunkNdx    (0)
141         , m_memoryChunkNdx              (0)
142 {
143         m_chunksPerAllocation.push_back(0);
144 }
145
146 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryHole (const deUint32 numChunks)
147 {
148         m_memoryChunkNdx                                                += numChunks;
149         m_chunksPerAllocation[m_allocationNdx]  += numChunks;
150
151         return *this;
152 }
153
154 SparseAllocationBuilder& SparseAllocationBuilder::addResourceHole (const deUint32 numChunks)
155 {
156         m_resourceChunkNdx += numChunks;
157
158         return *this;
159 }
160
161 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryAllocation (void)
162 {
163         DE_ASSERT(m_memoryChunkNdx != 0);       // doesn't make sense to have an empty allocation
164
165         m_allocationNdx  += 1;
166         m_memoryChunkNdx  = 0;
167         m_chunksPerAllocation.push_back(0);
168
169         return *this;
170 }
171
172 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryBind (const deUint32 numChunks)
173 {
174         const MemoryBind memoryBind =
175         {
176                 m_allocationNdx,
177                 m_resourceChunkNdx,
178                 m_memoryChunkNdx,
179                 numChunks
180         };
181         m_memoryBinds.push_back(memoryBind);
182
183         m_resourceChunkNdx                                              += numChunks;
184         m_memoryChunkNdx                                                += numChunks;
185         m_chunksPerAllocation[m_allocationNdx]  += numChunks;
186
187         return *this;
188 }
189
190 SparseAllocationBuilder& SparseAllocationBuilder::addAliasedMemoryBind  (const deUint32 allocationNdx, const deUint32 chunkOffset, const deUint32 numChunks)
191 {
192         DE_ASSERT(allocationNdx <= m_allocationNdx);
193
194         const MemoryBind memoryBind =
195         {
196                 allocationNdx,
197                 m_resourceChunkNdx,
198                 chunkOffset,
199                 numChunks
200         };
201         m_memoryBinds.push_back(memoryBind);
202
203         m_resourceChunkNdx += numChunks;
204
205         return *this;
206 }
207
208 MovePtr<SparseAllocation> SparseAllocationBuilder::build (const InstanceInterface&                      instanceInterface,
209                                                                                                                   const VkPhysicalDevice                        physicalDevice,
210                                                                                                                   const DeviceInterface&                        vk,
211                                                                                                                   const VkDevice                                        device,
212                                                                                                                   Allocator&                                            allocator,
213                                                                                                                   VkBufferCreateInfo                            referenceCreateInfo,
214                                                                                                                   const VkDeviceSize                            minChunkSize) const
215 {
216
217         MovePtr<SparseAllocation>       sparseAllocation                        (new SparseAllocation());
218
219                                                                 referenceCreateInfo.size        = sizeof(deUint32);
220         const Unique<VkBuffer>          refBuffer                                       (createBuffer(vk, device, &referenceCreateInfo));
221         const VkMemoryRequirements      memoryRequirements                      = getBufferMemoryRequirements(vk, device, *refBuffer);
222         const VkDeviceSize                      chunkSize                                       = std::max(memoryRequirements.alignment, static_cast<VkDeviceSize>(deAlign64(minChunkSize, memoryRequirements.alignment)));
223         const deUint32                          memoryTypeNdx                           = findMatchingMemoryType(instanceInterface, physicalDevice, memoryRequirements, MemoryRequirement::Any);
224         VkMemoryAllocateInfo            allocInfo                                       =
225         {
226                 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, //      VkStructureType                 sType;
227                 DE_NULL,                                                                //      const void*                             pNext;
228                 memoryRequirements.size,                                //      VkDeviceSize                    allocationSize;
229                 memoryTypeNdx,                                                  //      deUint32                                memoryTypeIndex;
230         };
231
232         for (std::vector<deUint32>::const_iterator numChunksIter = m_chunksPerAllocation.begin(); numChunksIter != m_chunksPerAllocation.end(); ++numChunksIter)
233         {
234                 allocInfo.allocationSize = *numChunksIter * chunkSize;
235                 sparseAllocation->allocations.push_back(makeDeSharedPtr(allocator.allocate(allocInfo, (VkDeviceSize)0)));
236         }
237
238         for (std::vector<MemoryBind>::const_iterator memBindIter = m_memoryBinds.begin(); memBindIter != m_memoryBinds.end(); ++memBindIter)
239         {
240                 const Allocation&                       alloc   = **sparseAllocation->allocations[memBindIter->allocationNdx];
241                 const VkSparseMemoryBind        bind    =
242                 {
243                         memBindIter->resourceChunkNdx * chunkSize,                                                      // VkDeviceSize               resourceOffset;
244                         memBindIter->numChunks * chunkSize,                                                                     // VkDeviceSize               size;
245                         alloc.getMemory(),                                                                                                      // VkDeviceMemory             memory;
246                         alloc.getOffset() + memBindIter->memoryChunkNdx * chunkSize,            // VkDeviceSize               memoryOffset;
247                         (VkSparseMemoryBindFlags)0,                                                                                     // VkSparseMemoryBindFlags    flags;
248                 };
249                 sparseAllocation->memoryBinds.push_back(bind);
250                 referenceCreateInfo.size = std::max(referenceCreateInfo.size, bind.resourceOffset + bind.size);
251         }
252
253         sparseAllocation->resourceSize          = referenceCreateInfo.size;
254         sparseAllocation->numResourceChunks = m_resourceChunkNdx;
255         sparseAllocation->memoryType            = memoryTypeNdx;
256         sparseAllocation->heapIndex                     = getHeapIndexForMemoryType(instanceInterface, physicalDevice, memoryTypeNdx);
257
258         return sparseAllocation;
259 }
260
261 VkImageCreateInfo makeImageCreateInfo (const VkFormat format, const IVec2& size, const VkImageUsageFlags usage)
262 {
263         const VkImageCreateInfo imageParams =
264         {
265                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                    // VkStructureType                      sType;
266                 DE_NULL,                                                                                // const void*                          pNext;
267                 (VkImageCreateFlags)0,                                                  // VkImageCreateFlags           flags;
268                 VK_IMAGE_TYPE_2D,                                                               // VkImageType                          imageType;
269                 format,                                                                                 // VkFormat                                     format;
270                 makeExtent3D(size.x(), size.y(), 1),                    // VkExtent3D                           extent;
271                 1u,                                                                                             // deUint32                                     mipLevels;
272                 1u,                                                                                             // deUint32                                     arrayLayers;
273                 VK_SAMPLE_COUNT_1_BIT,                                                  // VkSampleCountFlagBits        samples;
274                 VK_IMAGE_TILING_OPTIMAL,                                                // VkImageTiling                        tiling;
275                 usage,                                                                                  // VkImageUsageFlags            usage;
276                 VK_SHARING_MODE_EXCLUSIVE,                                              // VkSharingMode                        sharingMode;
277                 0u,                                                                                             // deUint32                                     queueFamilyIndexCount;
278                 DE_NULL,                                                                                // const deUint32*                      pQueueFamilyIndices;
279                 VK_IMAGE_LAYOUT_UNDEFINED,                                              // VkImageLayout                        initialLayout;
280         };
281         return imageParams;
282 }
283
284 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface&                                   vk,
285                                                                            const VkDevice                                                       device,
286                                                                            const VkPipelineLayout                                       pipelineLayout,
287                                                                            const VkRenderPass                                           renderPass,
288                                                                            const IVec2                                                          renderSize,
289                                                                            const VkPrimitiveTopology                            topology,
290                                                                            const deUint32                                                       stageCount,
291                                                                            const VkPipelineShaderStageCreateInfo*       pStages)
292 {
293         const VkVertexInputBindingDescription vertexInputBindingDescription =
294         {
295                 0u,                                                             // uint32_t                             binding;
296                 sizeof(Vec4),                                   // uint32_t                             stride;
297                 VK_VERTEX_INPUT_RATE_VERTEX,    // VkVertexInputRate    inputRate;
298         };
299
300         const VkVertexInputAttributeDescription vertexInputAttributeDescription =
301         {
302                 0u,                                                                     // uint32_t                     location;
303                 0u,                                                                     // uint32_t                     binding;
304                 VK_FORMAT_R32G32B32A32_SFLOAT,          // VkFormat                     format;
305                 0u,                                                                     // uint32_t                     offset;
306         };
307
308         const VkPipelineVertexInputStateCreateInfo vertexInputStateInfo =
309         {
310                 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,              // VkStructureType                             sType;
311                 DE_NULL,                                                                                                                // const void*                                 pNext;
312                 (VkPipelineVertexInputStateCreateFlags)0,                                               // VkPipelineVertexInputStateCreateFlags       flags;
313                 1u,                                                                                                                             // uint32_t                                    vertexBindingDescriptionCount;
314                 &vertexInputBindingDescription,                                                                 // const VkVertexInputBindingDescription*      pVertexBindingDescriptions;
315                 1u,                                                                                                                             // uint32_t                                    vertexAttributeDescriptionCount;
316                 &vertexInputAttributeDescription,                                                               // const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions;
317         };
318
319         const VkPipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateInfo =
320         {
321                 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,    // VkStructureType                             sType;
322                 DE_NULL,                                                                                                                // const void*                                 pNext;
323                 (VkPipelineInputAssemblyStateCreateFlags)0,                                             // VkPipelineInputAssemblyStateCreateFlags     flags;
324                 topology,                                                                                                               // VkPrimitiveTopology                         topology;
325                 VK_FALSE,                                                                                                               // VkBool32                                    primitiveRestartEnable;
326         };
327
328         const VkViewport        viewport        = makeViewport(renderSize);
329         const VkRect2D          scissor         = makeRect2D(renderSize);
330
331         const VkPipelineViewportStateCreateInfo pipelineViewportStateInfo =
332         {
333                 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,                  // VkStructureType                             sType;
334                 DE_NULL,                                                                                                                // const void*                                 pNext;
335                 (VkPipelineViewportStateCreateFlags)0,                                                  // VkPipelineViewportStateCreateFlags          flags;
336                 1u,                                                                                                                             // uint32_t                                    viewportCount;
337                 &viewport,                                                                                                              // const VkViewport*                           pViewports;
338                 1u,                                                                                                                             // uint32_t                                    scissorCount;
339                 &scissor,                                                                                                               // const VkRect2D*                             pScissors;
340         };
341
342         const VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateInfo =
343         {
344                 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,             // VkStructureType                          sType;
345                 DE_NULL,                                                                                                                // const void*                              pNext;
346                 (VkPipelineRasterizationStateCreateFlags)0,                                             // VkPipelineRasterizationStateCreateFlags  flags;
347                 VK_FALSE,                                                                                                               // VkBool32                                 depthClampEnable;
348                 VK_FALSE,                                                                                                               // VkBool32                                 rasterizerDiscardEnable;
349                 VK_POLYGON_MODE_FILL,                                                                                   // VkPolygonMode                                                        polygonMode;
350                 VK_CULL_MODE_NONE,                                                                                              // VkCullModeFlags                                                      cullMode;
351                 VK_FRONT_FACE_COUNTER_CLOCKWISE,                                                                // VkFrontFace                                                          frontFace;
352                 VK_FALSE,                                                                                                               // VkBool32                                                                     depthBiasEnable;
353                 0.0f,                                                                                                                   // float                                                                        depthBiasConstantFactor;
354                 0.0f,                                                                                                                   // float                                                                        depthBiasClamp;
355                 0.0f,                                                                                                                   // float                                                                        depthBiasSlopeFactor;
356                 1.0f,                                                                                                                   // float                                                                        lineWidth;
357         };
358
359         const VkPipelineMultisampleStateCreateInfo pipelineMultisampleStateInfo =
360         {
361                 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,               // VkStructureType                                                      sType;
362                 DE_NULL,                                                                                                                // const void*                                                          pNext;
363                 (VkPipelineMultisampleStateCreateFlags)0,                                               // VkPipelineMultisampleStateCreateFlags        flags;
364                 VK_SAMPLE_COUNT_1_BIT,                                                                                  // VkSampleCountFlagBits                                        rasterizationSamples;
365                 VK_FALSE,                                                                                                               // VkBool32                                                                     sampleShadingEnable;
366                 0.0f,                                                                                                                   // float                                                                        minSampleShading;
367                 DE_NULL,                                                                                                                // const VkSampleMask*                                          pSampleMask;
368                 VK_FALSE,                                                                                                               // VkBool32                                                                     alphaToCoverageEnable;
369                 VK_FALSE                                                                                                                // VkBool32                                                                     alphaToOneEnable;
370         };
371
372         const VkStencilOpState stencilOpState = makeStencilOpState(
373                 VK_STENCIL_OP_KEEP,                             // stencil fail
374                 VK_STENCIL_OP_KEEP,                             // depth & stencil pass
375                 VK_STENCIL_OP_KEEP,                             // depth only fail
376                 VK_COMPARE_OP_ALWAYS,                   // compare op
377                 0u,                                                             // compare mask
378                 0u,                                                             // write mask
379                 0u);                                                    // reference
380
381         VkPipelineDepthStencilStateCreateInfo pipelineDepthStencilStateInfo =
382         {
383                 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,             // VkStructureType                                                      sType;
384                 DE_NULL,                                                                                                                // const void*                                                          pNext;
385                 (VkPipelineDepthStencilStateCreateFlags)0,                                              // VkPipelineDepthStencilStateCreateFlags       flags;
386                 VK_FALSE,                                                                                                               // VkBool32                                                                     depthTestEnable;
387                 VK_FALSE,                                                                                                               // VkBool32                                                                     depthWriteEnable;
388                 VK_COMPARE_OP_LESS,                                                                                             // VkCompareOp                                                          depthCompareOp;
389                 VK_FALSE,                                                                                                               // VkBool32                                                                     depthBoundsTestEnable;
390                 VK_FALSE,                                                                                                               // VkBool32                                                                     stencilTestEnable;
391                 stencilOpState,                                                                                                 // VkStencilOpState                                                     front;
392                 stencilOpState,                                                                                                 // VkStencilOpState                                                     back;
393                 0.0f,                                                                                                                   // float                                                                        minDepthBounds;
394                 1.0f,                                                                                                                   // float                                                                        maxDepthBounds;
395         };
396
397         const VkColorComponentFlags                                     colorComponentsAll                                      = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
398         const VkPipelineColorBlendAttachmentState       pipelineColorBlendAttachmentState       =
399         {
400                 VK_FALSE,                                               // VkBool32                                     blendEnable;
401                 VK_BLEND_FACTOR_ONE,                    // VkBlendFactor                        srcColorBlendFactor;
402                 VK_BLEND_FACTOR_ZERO,                   // VkBlendFactor                        dstColorBlendFactor;
403                 VK_BLEND_OP_ADD,                                // VkBlendOp                            colorBlendOp;
404                 VK_BLEND_FACTOR_ONE,                    // VkBlendFactor                        srcAlphaBlendFactor;
405                 VK_BLEND_FACTOR_ZERO,                   // VkBlendFactor                        dstAlphaBlendFactor;
406                 VK_BLEND_OP_ADD,                                // VkBlendOp                            alphaBlendOp;
407                 colorComponentsAll,                             // VkColorComponentFlags        colorWriteMask;
408         };
409
410         const VkPipelineColorBlendStateCreateInfo pipelineColorBlendStateInfo =
411         {
412                 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,               // VkStructureType                                                              sType;
413                 DE_NULL,                                                                                                                // const void*                                                                  pNext;
414                 (VkPipelineColorBlendStateCreateFlags)0,                                                // VkPipelineColorBlendStateCreateFlags                 flags;
415                 VK_FALSE,                                                                                                               // VkBool32                                                                             logicOpEnable;
416                 VK_LOGIC_OP_COPY,                                                                                               // VkLogicOp                                                                    logicOp;
417                 1u,                                                                                                                             // deUint32                                                                             attachmentCount;
418                 &pipelineColorBlendAttachmentState,                                                             // const VkPipelineColorBlendAttachmentState*   pAttachments;
419                 { 0.0f, 0.0f, 0.0f, 0.0f },                                                                             // float                                                                                blendConstants[4];
420         };
421
422         const VkGraphicsPipelineCreateInfo graphicsPipelineInfo =
423         {
424                 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,        // VkStructureType                                                                      sType;
425                 DE_NULL,                                                                                        // const void*                                                                          pNext;
426                 (VkPipelineCreateFlags)0,                                                       // VkPipelineCreateFlags                                                        flags;
427                 stageCount,                                                                                     // deUint32                                                                                     stageCount;
428                 pStages,                                                                                        // const VkPipelineShaderStageCreateInfo*                       pStages;
429                 &vertexInputStateInfo,                                                          // const VkPipelineVertexInputStateCreateInfo*          pVertexInputState;
430                 &pipelineInputAssemblyStateInfo,                                        // const VkPipelineInputAssemblyStateCreateInfo*        pInputAssemblyState;
431                 DE_NULL,                                                                                        // const VkPipelineTessellationStateCreateInfo*         pTessellationState;
432                 &pipelineViewportStateInfo,                                                     // const VkPipelineViewportStateCreateInfo*                     pViewportState;
433                 &pipelineRasterizationStateInfo,                                        // const VkPipelineRasterizationStateCreateInfo*        pRasterizationState;
434                 &pipelineMultisampleStateInfo,                                          // const VkPipelineMultisampleStateCreateInfo*          pMultisampleState;
435                 &pipelineDepthStencilStateInfo,                                         // const VkPipelineDepthStencilStateCreateInfo*         pDepthStencilState;
436                 &pipelineColorBlendStateInfo,                                           // const VkPipelineColorBlendStateCreateInfo*           pColorBlendState;
437                 DE_NULL,                                                                                        // const VkPipelineDynamicStateCreateInfo*                      pDynamicState;
438                 pipelineLayout,                                                                         // VkPipelineLayout                                                                     layout;
439                 renderPass,                                                                                     // VkRenderPass                                                                         renderPass;
440                 0u,                                                                                                     // deUint32                                                                                     subpass;
441                 DE_NULL,                                                                                        // VkPipeline                                                                           basePipelineHandle;
442                 0,                                                                                                      // deInt32                                                                                      basePipelineIndex;
443         };
444
445         return createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineInfo);
446 }
447
448 //! Return true if there are any red (or all zero) pixels in the image
449 bool imageHasErrorPixels (const tcu::ConstPixelBufferAccess image)
450 {
451         const Vec4 errorColor   = Vec4(1.0f, 0.0f, 0.0f, 1.0f);
452         const Vec4 blankColor   = Vec4();
453
454         for (int y = 0; y < image.getHeight(); ++y)
455         for (int x = 0; x < image.getWidth(); ++x)
456         {
457                 const Vec4 color = image.getPixel(x, y);
458                 if (color == errorColor || color == blankColor)
459                         return true;
460         }
461
462         return false;
463 }
464
465 class Renderer
466 {
467 public:
468         typedef std::map<VkShaderStageFlagBits, const VkSpecializationInfo*>    SpecializationMap;
469
470         //! Use the delegate to bind descriptor sets, vertex buffers, etc. and make a draw call
471         struct Delegate
472         {
473                 virtual                 ~Delegate               (void) {}
474                 virtual void    rendererDraw    (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const = 0;
475         };
476
477         Renderer (const DeviceInterface&                vk,
478                           const VkDevice                                device,
479                           Allocator&                                    allocator,
480                           const deUint32                                queueFamilyIndex,
481                           const VkDescriptorSetLayout   descriptorSetLayout,    //!< may be NULL, if no descriptors are used
482                           BinaryCollection&                             binaryCollection,
483                           const std::string&                    vertexName,
484                           const std::string&                    fragmentName,
485                           const VkBuffer                                colorBuffer,
486                           const IVec2&                                  renderSize,
487                           const VkFormat                                colorFormat,
488                           const Vec4&                                   clearColor,
489                           const VkPrimitiveTopology             topology,
490                           SpecializationMap                             specMap = SpecializationMap())
491                 : m_colorBuffer                         (colorBuffer)
492                 , m_renderSize                          (renderSize)
493                 , m_colorFormat                         (colorFormat)
494                 , m_colorSubresourceRange       (makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u))
495                 , m_clearColor                          (clearColor)
496                 , m_topology                            (topology)
497                 , m_descriptorSetLayout         (descriptorSetLayout)
498         {
499                 m_colorImage            = makeImage             (vk, device, makeImageCreateInfo(m_colorFormat, m_renderSize, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT));
500                 m_colorImageAlloc       = bindImage             (vk, device, allocator, *m_colorImage, MemoryRequirement::Any);
501                 m_colorAttachment       = makeImageView (vk, device, *m_colorImage, VK_IMAGE_VIEW_TYPE_2D, m_colorFormat, m_colorSubresourceRange);
502
503                 m_vertexModule          = createShaderModule    (vk, device, binaryCollection.get(vertexName), 0u);
504                 m_fragmentModule        = createShaderModule    (vk, device, binaryCollection.get(fragmentName), 0u);
505
506                 const VkPipelineShaderStageCreateInfo pShaderStages[] =
507                 {
508                         {
509                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,            // VkStructureType                                              sType;
510                                 DE_NULL,                                                                                                        // const void*                                                  pNext;
511                                 (VkPipelineShaderStageCreateFlags)0,                                            // VkPipelineShaderStageCreateFlags             flags;
512                                 VK_SHADER_STAGE_VERTEX_BIT,                                                                     // VkShaderStageFlagBits                                stage;
513                                 *m_vertexModule,                                                                                        // VkShaderModule                                               module;
514                                 "main",                                                                                                         // const char*                                                  pName;
515                                 specMap[VK_SHADER_STAGE_VERTEX_BIT],                                            // const VkSpecializationInfo*                  pSpecializationInfo;
516                         },
517                         {
518                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,            // VkStructureType                                              sType;
519                                 DE_NULL,                                                                                                        // const void*                                                  pNext;
520                                 (VkPipelineShaderStageCreateFlags)0,                                            // VkPipelineShaderStageCreateFlags             flags;
521                                 VK_SHADER_STAGE_FRAGMENT_BIT,                                                           // VkShaderStageFlagBits                                stage;
522                                 *m_fragmentModule,                                                                                      // VkShaderModule                                               module;
523                                 "main",                                                                                                         // const char*                                                  pName;
524                                 specMap[VK_SHADER_STAGE_FRAGMENT_BIT],                                          // const VkSpecializationInfo*                  pSpecializationInfo;
525                         }
526                 };
527
528                 m_renderPass            = makeRenderPass                (vk, device, m_colorFormat);
529                 m_framebuffer           = makeFramebuffer               (vk, device, *m_renderPass, m_colorAttachment.get(),
530                                                                                                          static_cast<deUint32>(m_renderSize.x()), static_cast<deUint32>(m_renderSize.y()));
531                 m_pipelineLayout        = makePipelineLayout    (vk, device, m_descriptorSetLayout);
532                 m_pipeline                      = makeGraphicsPipeline  (vk, device, *m_pipelineLayout, *m_renderPass, m_renderSize, m_topology, DE_LENGTH_OF_ARRAY(pShaderStages), pShaderStages);
533                 m_cmdPool                       = makeCommandPool               (vk, device, queueFamilyIndex);
534                 m_cmdBuffer                     = allocateCommandBuffer (vk, device, *m_cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
535         }
536
537         void draw (const DeviceInterface&       vk,
538                            const VkDevice                       device,
539                            const VkQueue                        queue,
540                            const Delegate&                      drawDelegate,
541                            const bool                           useDeviceGroups,
542                            const deUint32                       deviceID) const
543         {
544                 beginCommandBuffer(vk, *m_cmdBuffer);
545
546                 beginRenderPass(vk, *m_cmdBuffer, *m_renderPass, *m_framebuffer, makeRect2D(0, 0, m_renderSize.x(), m_renderSize.y()), m_clearColor);
547
548                 vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
549                 drawDelegate.rendererDraw(*m_pipelineLayout, *m_cmdBuffer);
550
551                 endRenderPass(vk, *m_cmdBuffer);
552
553                 copyImageToBuffer(vk, *m_cmdBuffer, *m_colorImage, m_colorBuffer, m_renderSize);
554
555                 endCommandBuffer(vk, *m_cmdBuffer);
556                 submitCommandsAndWait(vk, device, queue, *m_cmdBuffer, 0U, DE_NULL, DE_NULL, 0U, DE_NULL, useDeviceGroups, deviceID);
557         }
558
559 private:
560         const VkBuffer                                  m_colorBuffer;
561         const IVec2                                             m_renderSize;
562         const VkFormat                                  m_colorFormat;
563         const VkImageSubresourceRange   m_colorSubresourceRange;
564         const Vec4                                              m_clearColor;
565         const VkPrimitiveTopology               m_topology;
566         const VkDescriptorSetLayout             m_descriptorSetLayout;
567
568         Move<VkImage>                                   m_colorImage;
569         MovePtr<Allocation>                             m_colorImageAlloc;
570         Move<VkImageView>                               m_colorAttachment;
571         Move<VkShaderModule>                    m_vertexModule;
572         Move<VkShaderModule>                    m_fragmentModule;
573         Move<VkRenderPass>                              m_renderPass;
574         Move<VkFramebuffer>                             m_framebuffer;
575         Move<VkPipelineLayout>                  m_pipelineLayout;
576         Move<VkPipeline>                                m_pipeline;
577         Move<VkCommandPool>                             m_cmdPool;
578         Move<VkCommandBuffer>                   m_cmdBuffer;
579
580         // "deleted"
581                                 Renderer        (const Renderer&);
582         Renderer&       operator=       (const Renderer&);
583 };
584
585 void bindSparseBuffer (const DeviceInterface& vk, const VkDevice device, const VkQueue sparseQueue, const VkBuffer buffer, const SparseAllocation& sparseAllocation,
586                                                 const bool useDeviceGroups, deUint32 resourceDevId, deUint32 memoryDeviceId)
587 {
588         const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo =
589         {
590                 buffer,                                                                                                         // VkBuffer                     buffer;
591                 static_cast<deUint32>(sparseAllocation.memoryBinds.size()),     // uint32_t                     bindCount;
592                 &sparseAllocation.memoryBinds[0],                                                       // const VkSparseMemoryBind*    pBinds;
593         };
594
595         const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
596         {
597                 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO,                        //VkStructureType                                                       sType;
598                 DE_NULL,                                                                                                        //const void*                                                           pNext;
599                 resourceDevId,                                                                                          //deUint32                                                                      resourceDeviceIndex;
600                 memoryDeviceId,                                                                                         //deUint32                                                                      memoryDeviceIndex;
601         };
602
603         const VkBindSparseInfo bindInfo =
604         {
605                 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,                                                     // VkStructureType                             sType;
606                 useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL,            // const void*                                 pNext;
607                 0u,                                                                                                                     // uint32_t                                    waitSemaphoreCount;
608                 DE_NULL,                                                                                                        // const VkSemaphore*                          pWaitSemaphores;
609                 1u,                                                                                                                     // uint32_t                                    bufferBindCount;
610                 &sparseBufferMemoryBindInfo,                                                            // const VkSparseBufferMemoryBindInfo*         pBufferBinds;
611                 0u,                                                                                                                     // uint32_t                                    imageOpaqueBindCount;
612                 DE_NULL,                                                                                                        // const VkSparseImageOpaqueMemoryBindInfo*    pImageOpaqueBinds;
613                 0u,                                                                                                                     // uint32_t                                    imageBindCount;
614                 DE_NULL,                                                                                                        // const VkSparseImageMemoryBindInfo*          pImageBinds;
615                 0u,                                                                                                                     // uint32_t                                    signalSemaphoreCount;
616                 DE_NULL,                                                                                                        // const VkSemaphore*                          pSignalSemaphores;
617         };
618
619         const Unique<VkFence> fence(createFence(vk, device));
620
621         VK_CHECK(vk.queueBindSparse(sparseQueue, 1u, &bindInfo, *fence));
622         VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), VK_TRUE, ~0ull));
623 }
624
625 class SparseBufferTestInstance : public SparseResourcesBaseInstance, Renderer::Delegate
626 {
627 public:
628         SparseBufferTestInstance (Context& context, const TestFlags flags)
629                 : SparseResourcesBaseInstance   (context, (flags & TEST_FLAG_ENABLE_DEVICE_GROUPS) != 0)
630                 , m_aliased                                             ((flags & TEST_FLAG_ALIASED)   != 0)
631                 , m_residency                                   ((flags & TEST_FLAG_RESIDENCY) != 0)
632                 , m_nonResidentStrict                   ((flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0)
633                 , m_renderSize                                  (RENDER_SIZE, RENDER_SIZE)
634                 , m_colorFormat                                 (VK_FORMAT_R8G8B8A8_UNORM)
635                 , m_colorBufferSize                             (m_renderSize.x() * m_renderSize.y() * tcu::getPixelSize(mapVkFormat(m_colorFormat)))
636         {
637                 {
638                         QueueRequirementsVec requirements;
639                         requirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
640                         requirements.push_back(QueueRequirements(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 1u));
641
642                         createDeviceSupportingQueues(requirements);
643                 }
644
645                 const DeviceInterface& vk = getDeviceInterface();
646
647                 m_sparseQueue                                   = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0u);
648                 m_universalQueue                                = getQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0u);
649
650                 m_sharedQueueFamilyIndices[0]   = m_sparseQueue.queueFamilyIndex;
651                 m_sharedQueueFamilyIndices[1]   = m_universalQueue.queueFamilyIndex;
652
653                 m_colorBuffer                                   = makeBuffer(vk, getDevice(), m_colorBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
654                 m_colorBufferAlloc                              = bindBuffer(vk, getDevice(), getAllocator(), *m_colorBuffer, MemoryRequirement::HostVisible);
655
656                 deMemset(m_colorBufferAlloc->getHostPtr(), 0, static_cast<std::size_t>(m_colorBufferSize));
657                 flushAlloc(vk, getDevice(), *m_colorBufferAlloc);
658         }
659
660 protected:
661         VkBufferCreateInfo getSparseBufferCreateInfo (const VkBufferUsageFlags usage) const
662         {
663                 VkBufferCreateFlags     flags = VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
664                 if (m_residency)
665                         flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
666                 if (m_aliased)
667                         flags |= VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
668
669                 VkBufferCreateInfo referenceBufferCreateInfo =
670                 {
671                         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,                           // VkStructureType        sType;
672                         DE_NULL,                                                                                        // const void*            pNext;
673                         flags,                                                                                          // VkBufferCreateFlags    flags;
674                         0u,     // override later                                                               // VkDeviceSize           size;
675                         VK_BUFFER_USAGE_TRANSFER_DST_BIT | usage,                       // VkBufferUsageFlags     usage;
676                         VK_SHARING_MODE_EXCLUSIVE,                                                      // VkSharingMode          sharingMode;
677                         0u,                                                                                                     // uint32_t               queueFamilyIndexCount;
678                         DE_NULL,                                                                                        // const uint32_t*        pQueueFamilyIndices;
679                 };
680
681                 if (m_sparseQueue.queueFamilyIndex != m_universalQueue.queueFamilyIndex)
682                 {
683                         referenceBufferCreateInfo.sharingMode                   = VK_SHARING_MODE_CONCURRENT;
684                         referenceBufferCreateInfo.queueFamilyIndexCount = DE_LENGTH_OF_ARRAY(m_sharedQueueFamilyIndices);
685                         referenceBufferCreateInfo.pQueueFamilyIndices   = m_sharedQueueFamilyIndices;
686                 }
687
688                 return referenceBufferCreateInfo;
689         }
690
691         void draw (const VkPrimitiveTopology    topology,
692                            const VkDescriptorSetLayout  descriptorSetLayout     = DE_NULL,
693                            Renderer::SpecializationMap  specMap                         = Renderer::SpecializationMap(),
694                            bool                                                 useDeviceGroups         = false,
695                            deUint32                                             deviceID                        = 0)
696         {
697                 const UniquePtr<Renderer> renderer(new Renderer(
698                         getDeviceInterface(), getDevice(), getAllocator(), m_universalQueue.queueFamilyIndex, descriptorSetLayout,
699                         m_context.getBinaryCollection(), "vert", "frag", *m_colorBuffer, m_renderSize, m_colorFormat, Vec4(1.0f, 0.0f, 0.0f, 1.0f), topology, specMap));
700
701                 renderer->draw(getDeviceInterface(), getDevice(), m_universalQueue.queueHandle, *this, useDeviceGroups, deviceID);
702         }
703
704         bool isResultImageCorrect (void) const
705         {
706                 invalidateAlloc(getDeviceInterface(), getDevice(), *m_colorBufferAlloc);
707
708                 const tcu::ConstPixelBufferAccess resultImage (mapVkFormat(m_colorFormat), m_renderSize.x(), m_renderSize.y(), 1u, m_colorBufferAlloc->getHostPtr());
709
710                 m_context.getTestContext().getLog()
711                         << tcu::LogImageSet("Result", "Result") << tcu::LogImage("color0", "", resultImage) << tcu::TestLog::EndImageSet;
712
713                 return !imageHasErrorPixels(resultImage);
714         }
715
716         const bool                                                      m_aliased;
717         const bool                                                      m_residency;
718         const bool                                                      m_nonResidentStrict;
719
720         Queue                                                           m_sparseQueue;
721         Queue                                                           m_universalQueue;
722
723 private:
724         const IVec2                                                     m_renderSize;
725         const VkFormat                                          m_colorFormat;
726         const VkDeviceSize                                      m_colorBufferSize;
727
728         Move<VkBuffer>                                          m_colorBuffer;
729         MovePtr<Allocation>                                     m_colorBufferAlloc;
730
731         deUint32                                                        m_sharedQueueFamilyIndices[2];
732 };
733
734 void initProgramsDrawWithUBO (vk::SourceCollections& programCollection, const TestFlags flags)
735 {
736         // Vertex shader
737         {
738                 std::ostringstream src;
739                 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
740                         << "\n"
741                         << "layout(location = 0) in vec4 in_position;\n"
742                         << "\n"
743                         << "out gl_PerVertex {\n"
744                         << "    vec4 gl_Position;\n"
745                         << "};\n"
746                         << "\n"
747                         << "void main(void)\n"
748                         << "{\n"
749                         << "    gl_Position = in_position;\n"
750                         << "}\n";
751
752                 programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
753         }
754
755         // Fragment shader
756         {
757                 const bool                      aliased                         = (flags & TEST_FLAG_ALIASED) != 0;
758                 const bool                      residency                       = (flags & TEST_FLAG_RESIDENCY) != 0;
759                 const bool                      nonResidentStrict       = (flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0;
760                 const std::string       valueExpr                       = (aliased ? "ivec4(3*(ndx % nonAliasedSize) ^ 127, 0, 0, 0)" : "ivec4(3*ndx ^ 127, 0, 0, 0)");
761
762                 std::ostringstream src;
763                 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
764                         << "\n"
765                         << "layout(location = 0) out vec4 o_color;\n"
766                         << "\n"
767                         << "layout(constant_id = 1) const int dataSize  = 1;\n"
768                         << "layout(constant_id = 2) const int chunkSize = 1;\n"
769                         << "\n"
770                         << "layout(set = 0, binding = 0, std140) uniform SparseBuffer {\n"
771                         << "    ivec4 data[dataSize];\n"
772                         << "} ubo;\n"
773                         << "\n"
774                         << "void main(void)\n"
775                         << "{\n"
776                         << "    const int fragNdx        = int(gl_FragCoord.x) + " << RENDER_SIZE << " * int(gl_FragCoord.y);\n"
777                         << "    const int pageSize       = " << RENDER_SIZE << " * " << RENDER_SIZE << ";\n"
778                         << "    const int numChunks      = dataSize / chunkSize;\n";
779
780                 if (aliased)
781                         src << "    const int nonAliasedSize = (numChunks > 1 ? dataSize - chunkSize : dataSize);\n";
782
783                 src << "    bool      ok             = true;\n"
784                         << "\n"
785                         << "    for (int ndx = fragNdx; ndx < dataSize; ndx += pageSize)\n"
786                         << "    {\n";
787
788                 if (residency && nonResidentStrict)
789                 {
790                         src << "        if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
791                                 << "            ok = ok && (ubo.data[ndx] == ivec4(0));\n"
792                                 << "        else\n"
793                                 << "            ok = ok && (ubo.data[ndx] == " + valueExpr + ");\n";
794                 }
795                 else if (residency)
796                 {
797                         src << "        if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
798                                 << "            continue;\n"
799                                 << "        ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
800                 }
801                 else
802                         src << "        ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
803
804                 src << "    }\n"
805                         << "\n"
806                         << "    if (ok)\n"
807                         << "        o_color = vec4(0.0, 1.0, 0.0, 1.0);\n"
808                         << "    else\n"
809                         << "        o_color = vec4(1.0, 0.0, 0.0, 1.0);\n"
810                         << "}\n";
811
812                 programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
813         }
814 }
815
816 //! Sparse buffer backing a UBO
817 class UBOTestInstance : public SparseBufferTestInstance
818 {
819 public:
820         UBOTestInstance (Context& context, const TestFlags flags)
821                 : SparseBufferTestInstance      (context, flags)
822         {
823         }
824
825         void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
826         {
827                 const DeviceInterface&  vk                              = getDeviceInterface();
828                 const VkDeviceSize              vertexOffset    = 0ull;
829
830                 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
831                 vk.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
832                 vk.cmdDraw                              (cmdBuffer, 4u, 1u, 0u, 0u);
833         }
834
835         tcu::TestStatus iterate (void)
836         {
837                 const InstanceInterface&        instance                        = m_context.getInstanceInterface();
838                 const DeviceInterface&          vk                                      = getDeviceInterface();
839                 MovePtr<SparseAllocation>       sparseAllocation;
840                 Move<VkBuffer>                          sparseBuffer;
841                 Move<VkBuffer>                          sparseBufferAliased;
842                 bool                                            setupDescriptors        = true;
843
844                 // Go through all physical devices
845                 for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
846                 {
847                         const deUint32  firstDeviceID   = physDevID;
848                         const deUint32  secondDeviceID  = (firstDeviceID + 1) % m_numPhysicalDevices;
849
850                         // Set up the sparse buffer
851                         {
852                                 VkBufferCreateInfo      referenceBufferCreateInfo       = getSparseBufferCreateInfo(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
853                                 const VkDeviceSize      minChunkSize                            = 512u; // make sure the smallest allocation is at least this big
854                                 deUint32                        numMaxChunks                            = 0u;
855
856                                 // Check how many chunks we can allocate given the alignment and size requirements of UBOs
857                                 {
858                                         const UniquePtr<SparseAllocation> minAllocation(SparseAllocationBuilder()
859                                                 .addMemoryBind()
860                                                 .build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize));
861
862                                         numMaxChunks = deMaxu32(static_cast<deUint32>(m_context.getDeviceProperties().limits.maxUniformBufferRange / minAllocation->resourceSize), 1u);
863                                 }
864
865                                 if (numMaxChunks < 4)
866                                 {
867                                         sparseAllocation = SparseAllocationBuilder()
868                                                 .addMemoryBind()
869                                                 .build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
870                                 }
871                                 else
872                                 {
873                                         // Try to use a non-trivial memory allocation scheme to make it different from a non-sparse binding
874                                         SparseAllocationBuilder builder;
875                                         builder.addMemoryBind();
876
877                                         if (m_residency)
878                                                 builder.addResourceHole();
879
880                                         builder
881                                                 .addMemoryAllocation()
882                                                 .addMemoryHole()
883                                                 .addMemoryBind();
884
885                                         if (m_aliased)
886                                                 builder.addAliasedMemoryBind(0u, 0u);
887
888                                         sparseAllocation = builder.build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
889                                         DE_ASSERT(sparseAllocation->resourceSize <= m_context.getDeviceProperties().limits.maxUniformBufferRange);
890                                 }
891
892                                 if (firstDeviceID != secondDeviceID)
893                                 {
894                                         VkPeerMemoryFeatureFlags        peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
895                                         vk.getDeviceGroupPeerMemoryFeatures(getDevice(), sparseAllocation->heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
896
897                                         if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT)    == 0) ||
898                                                 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
899                                         {
900                                                 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
901                                         }
902                                 }
903
904                                 // Create the buffer
905                                 referenceBufferCreateInfo.size  = sparseAllocation->resourceSize;
906                                 sparseBuffer                                    = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
907                                 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBuffer, *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
908
909                                 if (m_aliased)
910                                 {
911                                         sparseBufferAliased = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
912                                         bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBufferAliased, *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
913                                 }
914                         }
915
916                         // Set uniform data
917                         {
918                                 const bool                                      hasAliasedChunk         = (m_aliased && sparseAllocation->memoryBinds.size() > 1u);
919                                 const VkDeviceSize                      chunkSize                       = sparseAllocation->resourceSize / sparseAllocation->numResourceChunks;
920                                 const VkDeviceSize                      stagingBufferSize       = sparseAllocation->resourceSize - (hasAliasedChunk ? chunkSize : 0);
921                                 const deUint32                          numBufferEntries        = static_cast<deUint32>(stagingBufferSize / sizeof(IVec4));
922
923                                 const Unique<VkBuffer>          stagingBuffer           (makeBuffer(vk, getDevice(), stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT));
924                                 const UniquePtr<Allocation>     stagingBufferAlloc      (bindBuffer(vk, getDevice(), getAllocator(), *stagingBuffer, MemoryRequirement::HostVisible));
925
926                                 {
927                                         // If aliased chunk is used, the staging buffer is smaller than the sparse buffer and we don't overwrite the last chunk
928                                         IVec4* const pData = static_cast<IVec4*>(stagingBufferAlloc->getHostPtr());
929                                         for (deUint32 i = 0; i < numBufferEntries; ++i)
930                                                 pData[i] = IVec4(3*i ^ 127, 0, 0, 0);
931
932                                         flushAlloc(vk, getDevice(), *stagingBufferAlloc);
933
934                                         const VkBufferCopy copyRegion =
935                                         {
936                                                 0ull,                                           // VkDeviceSize    srcOffset;
937                                                 0ull,                                           // VkDeviceSize    dstOffset;
938                                                 stagingBufferSize,                      // VkDeviceSize    size;
939                                         };
940
941                                         const Unique<VkCommandPool>             cmdPool         (makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
942                                         const Unique<VkCommandBuffer>   cmdBuffer       (allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
943
944                                         beginCommandBuffer      (vk, *cmdBuffer);
945                                         vk.cmdCopyBuffer        (*cmdBuffer, *stagingBuffer, *sparseBuffer, 1u, &copyRegion);
946                                         endCommandBuffer        (vk, *cmdBuffer);
947
948                                         submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
949                                         // Once the fence is signaled, the write is also available to the aliasing buffer.
950                                 }
951                         }
952
953                         // Make sure that we don't try to access a larger range than is allowed. This only applies to a single chunk case.
954                         const deUint32 maxBufferRange = deMinu32(static_cast<deUint32>(sparseAllocation->resourceSize), m_context.getDeviceProperties().limits.maxUniformBufferRange);
955
956                         // Descriptor sets
957                         {
958                                 // Setup only once
959                                 if (setupDescriptors)
960                                 {
961                                         m_descriptorSetLayout = DescriptorSetLayoutBuilder()
962                                                 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT)
963                                                 .build(vk, getDevice());
964
965                                         m_descriptorPool = DescriptorPoolBuilder()
966                                                 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
967                                                 .build(vk, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
968
969                                         m_descriptorSet = makeDescriptorSet(vk, getDevice(), *m_descriptorPool, *m_descriptorSetLayout);
970                                         setupDescriptors = false;
971                                 }
972
973                                 const VkBuffer                                  buffer                          = (m_aliased ? *sparseBufferAliased : *sparseBuffer);
974                                 const VkDescriptorBufferInfo    sparseBufferInfo        = makeDescriptorBufferInfo(buffer, 0ull, maxBufferRange);
975
976                                 DescriptorSetUpdateBuilder()
977                                         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &sparseBufferInfo)
978                                         .update(vk, getDevice());
979                         }
980
981                         // Vertex data
982                         {
983                                 const Vec4 vertexData[] =
984                                 {
985                                         Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
986                                         Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
987                                         Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
988                                         Vec4( 1.0f,  1.0f, 0.0f, 1.0f),
989                                 };
990
991                                 const VkDeviceSize      vertexBufferSize        = sizeof(vertexData);
992
993                                 m_vertexBuffer          = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
994                                 m_vertexBufferAlloc     = bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
995
996                                 deMemcpy(m_vertexBufferAlloc->getHostPtr(), &vertexData[0], vertexBufferSize);
997                                 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
998                         }
999
1000                         // Draw
1001                         {
1002                                 std::vector<deInt32> specializationData;
1003                                 {
1004                                         const deUint32  numBufferEntries        = maxBufferRange / static_cast<deUint32>(sizeof(IVec4));
1005                                         const deUint32  numEntriesPerChunk      = numBufferEntries / sparseAllocation->numResourceChunks;
1006
1007                                         specializationData.push_back(numBufferEntries);
1008                                         specializationData.push_back(numEntriesPerChunk);
1009                                 }
1010
1011                                 const VkSpecializationMapEntry  specMapEntries[] =
1012                                 {
1013                                         {
1014                                                 1u,                                     // uint32_t    constantID;
1015                                                 0u,                                     // uint32_t    offset;
1016                                                 sizeof(deInt32),        // size_t      size;
1017                                         },
1018                                         {
1019                                                 2u,                                     // uint32_t    constantID;
1020                                                 sizeof(deInt32),        // uint32_t    offset;
1021                                                 sizeof(deInt32),        // size_t      size;
1022                                         },
1023                                 };
1024
1025                                 const VkSpecializationInfo specInfo =
1026                                 {
1027                                         DE_LENGTH_OF_ARRAY(specMapEntries),             // uint32_t                           mapEntryCount;
1028                                         specMapEntries,                                                 // const VkSpecializationMapEntry*    pMapEntries;
1029                                         sizeInBytes(specializationData),                // size_t                             dataSize;
1030                                         getDataOrNullptr(specializationData),   // const void*                        pData;
1031                                 };
1032
1033                                 Renderer::SpecializationMap     specMap;
1034                                 specMap[VK_SHADER_STAGE_FRAGMENT_BIT] = &specInfo;
1035
1036                                 draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, *m_descriptorSetLayout, specMap, usingDeviceGroups(), firstDeviceID);
1037                         }
1038
1039                         if(!isResultImageCorrect())
1040                                 return tcu::TestStatus::fail("Some buffer values were incorrect");
1041                 }
1042                 return tcu::TestStatus::pass("Pass");
1043         }
1044
1045 private:
1046         Move<VkBuffer>                                  m_vertexBuffer;
1047         MovePtr<Allocation>                             m_vertexBufferAlloc;
1048
1049         Move<VkDescriptorSetLayout>             m_descriptorSetLayout;
1050         Move<VkDescriptorPool>                  m_descriptorPool;
1051         Move<VkDescriptorSet>                   m_descriptorSet;
1052 };
1053
1054 void initProgramsDrawGrid (vk::SourceCollections& programCollection, const TestFlags flags)
1055 {
1056         DE_UNREF(flags);
1057
1058         // Vertex shader
1059         {
1060                 std::ostringstream src;
1061                 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1062                         << "\n"
1063                         << "layout(location = 0) in  vec4 in_position;\n"
1064                         << "layout(location = 0) out int  out_ndx;\n"
1065                         << "\n"
1066                         << "out gl_PerVertex {\n"
1067                         << "    vec4 gl_Position;\n"
1068                         << "};\n"
1069                         << "\n"
1070                         << "void main(void)\n"
1071                         << "{\n"
1072                         << "    gl_Position = in_position;\n"
1073                         << "    out_ndx     = gl_VertexIndex;\n"
1074                         << "}\n";
1075
1076                 programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
1077         }
1078
1079         // Fragment shader
1080         {
1081                 std::ostringstream src;
1082                 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1083                         << "\n"
1084                         << "layout(location = 0) flat in  int  in_ndx;\n"
1085                         << "layout(location = 0)      out vec4 o_color;\n"
1086                         << "\n"
1087                         << "void main(void)\n"
1088                         << "{\n"
1089                         << "    if (in_ndx % 2 == 0)\n"
1090                         << "        o_color = vec4(vec3(1.0), 1.0);\n"
1091                         << "    else\n"
1092                         << "        o_color = vec4(vec3(0.75), 1.0);\n"
1093                         << "}\n";
1094
1095                 programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
1096         }
1097 }
1098
1099 //! Generate vertex positions for a grid of tiles composed of two triangles each (6 vertices)
1100 void generateGrid (void* pRawData, const float step, const float ox, const float oy, const deUint32 numX, const deUint32 numY, const float z = 0.0f)
1101 {
1102         typedef Vec4 (*TilePtr)[6];
1103
1104         TilePtr const pData = static_cast<TilePtr>(pRawData);
1105         {
1106                 for (deUint32 iy = 0; iy < numY; ++iy)
1107                 for (deUint32 ix = 0; ix < numX; ++ix)
1108                 {
1109                         const deUint32  ndx     = ix + numX * iy;
1110                         const float             x       = ox + step * static_cast<float>(ix);
1111                         const float             y       = oy + step * static_cast<float>(iy);
1112
1113                         pData[ndx][0] = Vec4(x + step,  y,                      z, 1.0f);
1114                         pData[ndx][1] = Vec4(x,                 y,                      z, 1.0f);
1115                         pData[ndx][2] = Vec4(x,                 y + step,       z, 1.0f);
1116
1117                         pData[ndx][3] = Vec4(x,                 y + step,       z, 1.0f);
1118                         pData[ndx][4] = Vec4(x + step,  y + step,       z, 1.0f);
1119                         pData[ndx][5] = Vec4(x + step,  y,                      z, 1.0f);
1120                 }
1121         }
1122 }
1123
1124 //! Base test for a sparse buffer backing a vertex/index buffer
1125 class DrawGridTestInstance : public SparseBufferTestInstance
1126 {
1127 public:
1128         DrawGridTestInstance (Context& context, const TestFlags flags, const VkBufferUsageFlags usage, const VkDeviceSize minChunkSize)
1129                 : SparseBufferTestInstance      (context, flags)
1130                 , m_bufferUsage                         (usage)
1131                 , m_minChunkSize                        (minChunkSize)
1132                 , m_perDrawBufferOffset         (0)
1133                 , m_stagingBufferSize           (0)
1134         {
1135         }
1136
1137         void createResources (deUint32 memoryDeviceIndex)
1138         {
1139                 const InstanceInterface&        instance                                        = m_context.getInstanceInterface();
1140                 const DeviceInterface&          vk                                                      = getDeviceInterface();
1141                 VkBufferCreateInfo                      referenceBufferCreateInfo       = getSparseBufferCreateInfo(m_bufferUsage);
1142
1143                 {
1144                         // Allocate two chunks, each covering half of the viewport
1145                         SparseAllocationBuilder builder;
1146                         builder.addMemoryBind();
1147
1148                         if (m_residency)
1149                                 builder.addResourceHole();
1150
1151                         builder
1152                                 .addMemoryAllocation()
1153                                 .addMemoryHole()
1154                                 .addMemoryBind();
1155
1156                         if (m_aliased)
1157                                 builder.addAliasedMemoryBind(0u, 0u);
1158
1159                         m_sparseAllocation      = builder.build(instance, getPhysicalDevice(memoryDeviceIndex), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, m_minChunkSize);
1160                 }
1161
1162                 // Create the buffer
1163                 referenceBufferCreateInfo.size  = m_sparseAllocation->resourceSize;
1164                 m_sparseBuffer                                  = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
1165
1166                 m_perDrawBufferOffset   = m_sparseAllocation->resourceSize / m_sparseAllocation->numResourceChunks;
1167                 m_stagingBufferSize             = 2 * m_perDrawBufferOffset;
1168                 m_stagingBuffer                 = makeBuffer(vk, getDevice(), m_stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
1169                 m_stagingBufferAlloc    = bindBuffer(vk, getDevice(), getAllocator(), *m_stagingBuffer, MemoryRequirement::HostVisible);
1170         }
1171
1172         tcu::TestStatus iterate (void)
1173         {
1174                 const DeviceInterface&  vk      = getDeviceInterface();
1175
1176                 for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
1177                 {
1178                         const deUint32  firstDeviceID   = physDevID;
1179                         const deUint32  secondDeviceID  = (firstDeviceID + 1) % m_numPhysicalDevices;
1180
1181                         createResources(secondDeviceID);
1182
1183                         if (firstDeviceID != secondDeviceID)
1184                         {
1185                                 VkPeerMemoryFeatureFlags        peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
1186                                 vk.getDeviceGroupPeerMemoryFeatures(getDevice(), m_sparseAllocation->heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
1187
1188                                 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT)    == 0) ||
1189                                         ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
1190                                 {
1191                                         TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
1192                                 }
1193                         }
1194
1195                         // Bind the memory
1196                         bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *m_sparseBuffer, *m_sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
1197
1198                         initializeBuffers();
1199
1200                         // Upload to the sparse buffer
1201                         {
1202                                 flushAlloc(vk, getDevice(), *m_stagingBufferAlloc);
1203
1204                                 VkDeviceSize    firstChunkOffset        = 0ull;
1205                                 VkDeviceSize    secondChunkOffset       = m_perDrawBufferOffset;
1206
1207                                 if (m_residency)
1208                                         secondChunkOffset += m_perDrawBufferOffset;
1209
1210                                 if (m_aliased)
1211                                         firstChunkOffset = secondChunkOffset + m_perDrawBufferOffset;
1212
1213                                 const VkBufferCopy copyRegions[] =
1214                                 {
1215                                         {
1216                                                 0ull,                                           // VkDeviceSize    srcOffset;
1217                                                 firstChunkOffset,                       // VkDeviceSize    dstOffset;
1218                                                 m_perDrawBufferOffset,          // VkDeviceSize    size;
1219                                         },
1220                                         {
1221                                                 m_perDrawBufferOffset,          // VkDeviceSize    srcOffset;
1222                                                 secondChunkOffset,                      // VkDeviceSize    dstOffset;
1223                                                 m_perDrawBufferOffset,          // VkDeviceSize    size;
1224                                         },
1225                                 };
1226
1227                                 const Unique<VkCommandPool>             cmdPool         (makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
1228                                 const Unique<VkCommandBuffer>   cmdBuffer       (allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1229
1230                                 beginCommandBuffer      (vk, *cmdBuffer);
1231                                 vk.cmdCopyBuffer        (*cmdBuffer, *m_stagingBuffer, *m_sparseBuffer, DE_LENGTH_OF_ARRAY(copyRegions), copyRegions);
1232                                 endCommandBuffer        (vk, *cmdBuffer);
1233
1234                                 submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
1235                         }
1236
1237
1238                         Renderer::SpecializationMap     specMap;
1239                         draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, DE_NULL, specMap, usingDeviceGroups(), firstDeviceID);
1240
1241                         if(!isResultImageCorrect())
1242                                 return tcu::TestStatus::fail("Some buffer values were incorrect");
1243                 }
1244                 return tcu::TestStatus::pass("Pass");
1245         }
1246
1247 protected:
1248         virtual void                            initializeBuffers               (void) = 0;
1249
1250         const VkBufferUsageFlags        m_bufferUsage;
1251         const VkDeviceSize                      m_minChunkSize;
1252
1253         VkDeviceSize                            m_perDrawBufferOffset;
1254
1255         VkDeviceSize                            m_stagingBufferSize;
1256         Move<VkBuffer>                          m_stagingBuffer;
1257         MovePtr<Allocation>                     m_stagingBufferAlloc;
1258
1259         MovePtr<SparseAllocation>       m_sparseAllocation;
1260         Move<VkBuffer>                          m_sparseBuffer;
1261 };
1262
1263 //! Sparse buffer backing a vertex input buffer
1264 class VertexBufferTestInstance : public DrawGridTestInstance
1265 {
1266 public:
1267         VertexBufferTestInstance (Context& context, const TestFlags flags)
1268                 : DrawGridTestInstance  (context,
1269                                                                  flags,
1270                                                                  VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
1271                                                                  GRID_SIZE * GRID_SIZE * 6 * sizeof(Vec4))
1272         {
1273         }
1274
1275         void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1276         {
1277                 DE_UNREF(pipelineLayout);
1278
1279                 m_context.getTestContext().getLog()
1280                         << tcu::TestLog::Message << "Drawing a grid of triangles backed by a sparse vertex buffer. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1281
1282                 const DeviceInterface&  vk                              = getDeviceInterface();
1283                 const deUint32                  vertexCount             = 6 * (GRID_SIZE * GRID_SIZE) / 2;
1284                 VkDeviceSize                    vertexOffset    = 0ull;
1285
1286                 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1287                 vk.cmdDraw                              (cmdBuffer, vertexCount, 1u, 0u, 0u);
1288
1289                 vertexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1290
1291                 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1292                 vk.cmdDraw                              (cmdBuffer, vertexCount, 1u, 0u, 0u);
1293         }
1294
1295         void initializeBuffers (void)
1296         {
1297                 deUint8*        pData   = static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr());
1298                 const float     step    = 2.0f / static_cast<float>(GRID_SIZE);
1299
1300                 // Prepare data for two draw calls
1301                 generateGrid(pData,                                                     step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE/2);
1302                 generateGrid(pData + m_perDrawBufferOffset,     step, -1.0f,  0.0f, GRID_SIZE, GRID_SIZE/2);
1303         }
1304 };
1305
1306 //! Sparse buffer backing an index buffer
1307 class IndexBufferTestInstance : public DrawGridTestInstance
1308 {
1309 public:
1310         IndexBufferTestInstance (Context& context, const TestFlags flags)
1311                 : DrawGridTestInstance  (context,
1312                                                                  flags,
1313                                                                  VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
1314                                                                  GRID_SIZE * GRID_SIZE * 6 * sizeof(deUint32))
1315                 , m_halfVertexCount             (6 * (GRID_SIZE * GRID_SIZE) / 2)
1316         {
1317         }
1318
1319         void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1320         {
1321                 DE_UNREF(pipelineLayout);
1322
1323                 m_context.getTestContext().getLog()
1324                         << tcu::TestLog::Message << "Drawing a grid of triangles from a sparse index buffer. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1325
1326                 const DeviceInterface&  vk                              = getDeviceInterface();
1327                 const VkDeviceSize              vertexOffset    = 0ull;
1328                 VkDeviceSize                    indexOffset             = 0ull;
1329
1330                 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1331
1332                 vk.cmdBindIndexBuffer   (cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1333                 vk.cmdDrawIndexed               (cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1334
1335                 indexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1336
1337                 vk.cmdBindIndexBuffer   (cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1338                 vk.cmdDrawIndexed               (cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1339         }
1340
1341         void initializeBuffers (void)
1342         {
1343                 // Vertex buffer
1344                 const DeviceInterface&  vk                                      = getDeviceInterface();
1345                 const VkDeviceSize              vertexBufferSize        = 2 * m_halfVertexCount * sizeof(Vec4);
1346                                                                 m_vertexBuffer          = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1347                                                                 m_vertexBufferAlloc     = bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1348
1349                 {
1350                         const float     step = 2.0f / static_cast<float>(GRID_SIZE);
1351
1352                         generateGrid(m_vertexBufferAlloc->getHostPtr(), step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE);
1353
1354                         flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1355                 }
1356
1357                 // Sparse index buffer
1358                 for (deUint32 chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1359                 {
1360                         deUint8* const  pData           = static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1361                         deUint32* const pIndexData      = reinterpret_cast<deUint32*>(pData);
1362                         const deUint32  ndxBase         = chunkNdx * m_halfVertexCount;
1363
1364                         for (deUint32 i = 0u; i < m_halfVertexCount; ++i)
1365                                 pIndexData[i] = ndxBase + i;
1366                 }
1367         }
1368
1369 private:
1370         const deUint32                  m_halfVertexCount;
1371         Move<VkBuffer>                  m_vertexBuffer;
1372         MovePtr<Allocation>             m_vertexBufferAlloc;
1373 };
1374
1375 //! Draw from a sparse indirect buffer
1376 class IndirectBufferTestInstance : public DrawGridTestInstance
1377 {
1378 public:
1379         IndirectBufferTestInstance (Context& context, const TestFlags flags)
1380                 : DrawGridTestInstance  (context,
1381                                                                  flags,
1382                                                                  VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
1383                                                                  sizeof(VkDrawIndirectCommand))
1384         {
1385         }
1386
1387         void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1388         {
1389                 DE_UNREF(pipelineLayout);
1390
1391                 m_context.getTestContext().getLog()
1392                         << tcu::TestLog::Message << "Drawing two triangles covering the whole viewport. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1393
1394                 const DeviceInterface&  vk                              = getDeviceInterface();
1395                 const VkDeviceSize              vertexOffset    = 0ull;
1396                 VkDeviceSize                    indirectOffset  = 0ull;
1397
1398                 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1399                 vk.cmdDrawIndirect              (cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1400
1401                 indirectOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1402
1403                 vk.cmdDrawIndirect              (cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1404         }
1405
1406         void initializeBuffers (void)
1407         {
1408                 // Vertex buffer
1409                 const DeviceInterface&  vk                                      = getDeviceInterface();
1410                 const VkDeviceSize              vertexBufferSize        = 2 * 3 * sizeof(Vec4);
1411                                                                 m_vertexBuffer          = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1412                                                                 m_vertexBufferAlloc     = bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1413
1414                 {
1415                         generateGrid(m_vertexBufferAlloc->getHostPtr(), 2.0f, -1.0f, -1.0f, 1, 1);
1416                         flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1417                 }
1418
1419                 // Indirect buffer
1420                 for (deUint32 chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1421                 {
1422                         deUint8* const                                  pData           = static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1423                         VkDrawIndirectCommand* const    pCmdData        = reinterpret_cast<VkDrawIndirectCommand*>(pData);
1424
1425                         pCmdData->firstVertex   = 3u * chunkNdx;
1426                         pCmdData->firstInstance = 0u;
1427                         pCmdData->vertexCount   = 3u;
1428                         pCmdData->instanceCount = 1u;
1429                 }
1430         }
1431
1432 private:
1433         Move<VkBuffer>                  m_vertexBuffer;
1434         MovePtr<Allocation>             m_vertexBufferAlloc;
1435 };
1436
1437 //! Similar to the class in vktTestCaseUtil.hpp, but uses Arg0 directly rather than through a InstanceFunction1
1438 template<typename Arg0>
1439 class FunctionProgramsSimple1
1440 {
1441 public:
1442         typedef void    (*Function)                             (vk::SourceCollections& dst, Arg0 arg0);
1443                                         FunctionProgramsSimple1 (Function func) : m_func(func)                                                  {}
1444         void                    init                                    (vk::SourceCollections& dst, const Arg0& arg0) const    { m_func(dst, arg0); }
1445
1446 private:
1447         const Function  m_func;
1448 };
1449
1450 void checkSupport (Context& context, const TestFlags flags)
1451 {
1452         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
1453
1454         if (flags & TEST_FLAG_RESIDENCY)
1455                 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_BUFFER);
1456
1457         if (flags & TEST_FLAG_ALIASED)
1458                 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_ALIASED);
1459
1460         if (flags & TEST_FLAG_NON_RESIDENT_STRICT && !context.getDeviceProperties().sparseProperties.residencyNonResidentStrict)
1461                 TCU_THROW(NotSupportedError, "Missing sparse property: residencyNonResidentStrict");
1462 }
1463
1464 //! Convenience function to create a TestCase based on a freestanding initPrograms and a TestInstance implementation
1465 template<typename TestInstanceT, typename Arg0>
1466 TestCase* createTestInstanceWithPrograms (tcu::TestContext&                                                                     testCtx,
1467                                                                                   const std::string&                                                            name,
1468                                                                                   const std::string&                                                            desc,
1469                                                                                   typename FunctionProgramsSimple1<Arg0>::Function      initPrograms,
1470                                                                                   Arg0                                                                                          arg0)
1471 {
1472         return new InstanceFactory1WithSupport<TestInstanceT, Arg0, FunctionSupport1<Arg0>, FunctionProgramsSimple1<Arg0> >(
1473                 testCtx, tcu::NODETYPE_SELF_VALIDATE, name, desc, FunctionProgramsSimple1<Arg0>(initPrograms), arg0, typename FunctionSupport1<Arg0>::Args(checkSupport, arg0));
1474 }
1475
1476 void populateTestGroup (tcu::TestCaseGroup* parentGroup)
1477 {
1478         const struct
1479         {
1480                 std::string             name;
1481                 TestFlags               flags;
1482         } groups[] =
1483         {
1484                 { "sparse_binding",                                                                             0u,                                                                                                     },
1485                 { "sparse_binding_aliased",                                                             TEST_FLAG_ALIASED,                                                                      },
1486                 { "sparse_residency",                                                                   TEST_FLAG_RESIDENCY,                                                            },
1487                 { "sparse_residency_aliased",                                                   TEST_FLAG_RESIDENCY | TEST_FLAG_ALIASED,                        },
1488                 { "sparse_residency_non_resident_strict",                               TEST_FLAG_RESIDENCY | TEST_FLAG_NON_RESIDENT_STRICT,},
1489         };
1490
1491         const int numGroupsIncludingNonResidentStrict   = DE_LENGTH_OF_ARRAY(groups);
1492         const int numGroupsDefaultList                                  = numGroupsIncludingNonResidentStrict - 1;
1493         std::string devGroupPrefix                                              = "device_group_";
1494
1495         // Transfer
1496         {
1497                 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "transfer", ""));
1498                 {
1499                         MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding", ""));
1500                         addBufferSparseBindingTests(subGroup.get(), false);
1501                         group->addChild(subGroup.release());
1502
1503                         MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding", ""));
1504                         addBufferSparseBindingTests(subGroupDeviceGroups.get(), true);
1505                         group->addChild(subGroupDeviceGroups.release());
1506                 }
1507                 parentGroup->addChild(group.release());
1508         }
1509
1510         // SSBO
1511         {
1512                 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ssbo", ""));
1513                 {
1514                         MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding_aliased", ""));
1515                         addBufferSparseMemoryAliasingTests(subGroup.get(), false);
1516                         group->addChild(subGroup.release());
1517
1518                         MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding_aliased", ""));
1519                         addBufferSparseMemoryAliasingTests(subGroupDeviceGroups.get(), true);
1520                         group->addChild(subGroupDeviceGroups.release());
1521                 }
1522                 {
1523                         MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_residency", ""));
1524                         addBufferSparseResidencyTests(subGroup.get(), false);
1525                         group->addChild(subGroup.release());
1526
1527                         MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_residency", ""));
1528                         addBufferSparseResidencyTests(subGroupDeviceGroups.get(), true);
1529                         group->addChild(subGroupDeviceGroups.release());
1530                 }
1531                 parentGroup->addChild(group.release());
1532         }
1533
1534         // UBO
1535         {
1536                 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ubo", ""));
1537
1538                 for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1539                 {
1540                         group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawWithUBO, groups[groupNdx].flags));
1541                 }
1542                 for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1543                 {
1544                         group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawWithUBO, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1545                 }
1546                 parentGroup->addChild(group.release());
1547         }
1548
1549         // Vertex buffer
1550         {
1551                 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "vertex_buffer", ""));
1552
1553                 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1554                 {
1555                         group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1556                 }
1557                 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1558                 {
1559                         group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1560                 }
1561
1562                 parentGroup->addChild(group.release());
1563         }
1564
1565         // Index buffer
1566         {
1567                 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "index_buffer", ""));
1568
1569                 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1570                 {
1571                         group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1572                 }
1573                 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1574                 {
1575                         group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1576                 }
1577
1578                 parentGroup->addChild(group.release());
1579         }
1580
1581         // Indirect buffer
1582         {
1583                 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "indirect_buffer", ""));
1584
1585                 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1586                 {
1587                         group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1588                 }
1589                 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1590                 {
1591                         group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(group->getTestContext(), (devGroupPrefix +  groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1592                 }
1593
1594                 parentGroup->addChild(group.release());
1595         }
1596 }
1597
1598 } // anonymous ns
1599
1600 tcu::TestCaseGroup* createSparseBufferTests (tcu::TestContext& testCtx)
1601 {
1602         return createTestGroup(testCtx, "buffer", "Sparse buffer usage tests", populateTestGroup);
1603 }
1604
1605 } // sparse
1606 } // vkt