Limit changes by xor to upper 8 bits in mixed atomic tests am: 6bc3c7a634 am: eef2e71...
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / sparse_resources / vktSparseResourcesBufferMemoryAliasing.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktSparseResourcesBufferMemoryAliasing.cpp
21  * \brief Sparse buffer memory aliasing tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkQueryUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkTypeUtil.hpp"
39
40 #include "deStringUtil.hpp"
41 #include "deUniquePtr.hpp"
42
43 #include <string>
44 #include <vector>
45
46 using namespace vk;
47
48 namespace vkt
49 {
50 namespace sparse
51 {
52 namespace
53 {
54
55 enum ShaderParameters
56 {
57         SIZE_OF_UINT_IN_SHADER  = 4u,
58         MODULO_DIVISOR                  = 1024u
59 };
60
61 tcu::UVec3 computeWorkGroupSize (const deUint32 numInvocations)
62 {
63         const deUint32          maxComputeWorkGroupInvocations  = 128u;
64         const tcu::UVec3        maxComputeWorkGroupSize                 = tcu::UVec3(128u, 128u, 64u);
65         deUint32                        numInvocationsLeft                              = numInvocations;
66
67         const deUint32 xWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
68         numInvocationsLeft = numInvocationsLeft / xWorkGroupSize + ((numInvocationsLeft % xWorkGroupSize) ? 1u : 0u);
69
70         const deUint32 yWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations / xWorkGroupSize);
71         numInvocationsLeft = numInvocationsLeft / yWorkGroupSize + ((numInvocationsLeft % yWorkGroupSize) ? 1u : 0u);
72
73         const deUint32 zWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
74         numInvocationsLeft = numInvocationsLeft / zWorkGroupSize + ((numInvocationsLeft % zWorkGroupSize) ? 1u : 0u);
75
76         return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
77 }
78
79 class BufferSparseMemoryAliasingCase : public TestCase
80 {
81 public:
82                                         BufferSparseMemoryAliasingCase  (tcu::TestContext&              testCtx,
83                                                                                                          const std::string&             name,
84                                                                                                          const std::string&             description,
85                                                                                                          const deUint32                 bufferSize,
86                                                                                                          const glu::GLSLVersion glslVersion);
87
88         void                    initPrograms                                    (SourceCollections&             sourceCollections) const;
89         TestInstance*   createInstance                                  (Context&                               context) const;
90
91 private:
92         const   deUint32                        m_bufferSizeInBytes;
93         const   glu::GLSLVersion        m_glslVersion;
94 };
95
96 BufferSparseMemoryAliasingCase::BufferSparseMemoryAliasingCase (tcu::TestContext&               testCtx,
97                                                                                                                                 const std::string&              name,
98                                                                                                                                 const std::string&              description,
99                                                                                                                                 const deUint32                  bufferSize,
100                                                                                                                                 const glu::GLSLVersion  glslVersion)
101         : TestCase                              (testCtx, name, description)
102         , m_bufferSizeInBytes   (bufferSize)
103         , m_glslVersion                 (glslVersion)
104 {
105 }
106
107 void BufferSparseMemoryAliasingCase::initPrograms (SourceCollections& sourceCollections) const
108 {
109         // Create compute program
110         const char* const versionDecl           = glu::getGLSLVersionDeclaration(m_glslVersion);
111         const deUint32    numInvocations        = m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER;
112         const tcu::UVec3  workGroupSize         = computeWorkGroupSize(numInvocations);
113
114         std::ostringstream src;
115         src << versionDecl << "\n"
116                 << "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in;\n"
117                 << "layout(set = 0, binding = 0, std430) writeonly buffer Output\n"
118                 << "{\n"
119                 << "    uint result[];\n"
120                 << "} sb_out;\n"
121                 << "\n"
122                 << "void main (void)\n"
123                 << "{\n"
124                 << "    uint index = gl_GlobalInvocationID.x + (gl_GlobalInvocationID.y + gl_GlobalInvocationID.z*gl_NumWorkGroups.y*gl_WorkGroupSize.y)*gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
125                 << "    if ( index < " << m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER << "u )\n"
126                 << "    {\n"
127                 << "            sb_out.result[index] = index % " << MODULO_DIVISOR << "u;\n"
128                 << "    }\n"
129                 << "}\n";
130
131         sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
132 }
133
134 class BufferSparseMemoryAliasingInstance : public SparseResourcesBaseInstance
135 {
136 public:
137                                         BufferSparseMemoryAliasingInstance      (Context&                                       context,
138                                                                                                                  const deUint32                         bufferSize);
139
140         tcu::TestStatus iterate                                                         (void);
141
142 private:
143         const deUint32                  m_bufferSizeInBytes;
144 };
145
146 BufferSparseMemoryAliasingInstance::BufferSparseMemoryAliasingInstance (Context&                                        context,
147                                                                                                                                                 const deUint32                  bufferSize)
148         : SparseResourcesBaseInstance   (context)
149         , m_bufferSizeInBytes                   (bufferSize)
150 {
151 }
152
153 tcu::TestStatus BufferSparseMemoryAliasingInstance::iterate (void)
154 {
155         const InstanceInterface&                instance                = m_context.getInstanceInterface();
156         const VkPhysicalDevice                  physicalDevice  = m_context.getPhysicalDevice();
157
158         if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseBinding)
159                 TCU_THROW(NotSupportedError, "Sparse binding not supported");
160
161         if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyAliased)
162                 TCU_THROW(NotSupportedError, "Sparse memory aliasing not supported");
163
164         {
165                 // Create logical device supporting both sparse and compute operations
166                 QueueRequirementsVec queueRequirements;
167                 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
168                 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
169
170                 createDeviceSupportingQueues(queueRequirements);
171         }
172
173         const DeviceInterface&  deviceInterface = getDeviceInterface();
174         const Queue&                    sparseQueue             = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
175         const Queue&                    computeQueue    = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
176
177         VkBufferCreateInfo bufferCreateInfo =
178         {
179                 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,   // VkStructureType              sType;
180                 DE_NULL,                                                                // const void*                  pNext;
181                 VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
182                 VK_BUFFER_CREATE_SPARSE_ALIASED_BIT,    // VkBufferCreateFlags  flags;
183                 m_bufferSizeInBytes,                                    // VkDeviceSize                 size;
184                 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
185                 VK_BUFFER_USAGE_TRANSFER_SRC_BIT,               // VkBufferUsageFlags   usage;
186                 VK_SHARING_MODE_EXCLUSIVE,                              // VkSharingMode                sharingMode;
187                 0u,                                                                             // deUint32                             queueFamilyIndexCount;
188                 DE_NULL                                                                 // const deUint32*              pQueueFamilyIndices;
189         };
190
191         const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex };
192
193         if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
194         {
195                 bufferCreateInfo.sharingMode                    = VK_SHARING_MODE_CONCURRENT;
196                 bufferCreateInfo.queueFamilyIndexCount  = 2u;
197                 bufferCreateInfo.pQueueFamilyIndices    = queueFamilyIndices;
198         }
199
200         // Create sparse buffers
201         const Unique<VkBuffer> sparseBufferWrite(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
202         const Unique<VkBuffer> sparseBufferRead (createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
203
204         // Create sparse buffers memory bind semaphore
205         const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
206
207         const VkMemoryRequirements      bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBufferWrite);
208
209         if (bufferMemRequirements.size > getPhysicalDeviceProperties(instance, physicalDevice).limits.sparseAddressSpaceSize)
210                 TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
211
212         DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
213
214         const deUint32 memoryType = findMatchingMemoryType(instance, physicalDevice, bufferMemRequirements, MemoryRequirement::Any);
215
216         if (memoryType == NO_MATCH_FOUND)
217                 return tcu::TestStatus::fail("No matching memory type found");
218
219         const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.size, memoryType, 0u);
220
221         Move<VkDeviceMemory> deviceMemoryPtr(check<VkDeviceMemory>(sparseMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL));
222
223         {
224                 const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo[2] =
225                 {
226                         makeSparseBufferMemoryBindInfo
227                         (*sparseBufferWrite,    //VkBuffer                                      buffer;
228                         1u,                                             //deUint32                                      bindCount;
229                         &sparseMemoryBind               //const VkSparseMemoryBind*     Binds;
230                         ),
231
232                         makeSparseBufferMemoryBindInfo
233                         (*sparseBufferRead,             //VkBuffer                                      buffer;
234                         1u,                                             //deUint32                                      bindCount;
235                         &sparseMemoryBind               //const VkSparseMemoryBind*     Binds;
236                         )
237                 };
238
239                 const VkBindSparseInfo bindSparseInfo =
240                 {
241                         VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,                     //VkStructureType                                                       sType;
242                         DE_NULL,                                                                        //const void*                                                           pNext;
243                         0u,                                                                                     //deUint32                                                                      waitSemaphoreCount;
244                         DE_NULL,                                                                        //const VkSemaphore*                                            pWaitSemaphores;
245                         2u,                                                                                     //deUint32                                                                      bufferBindCount;
246                         sparseBufferMemoryBindInfo,                                     //const VkSparseBufferMemoryBindInfo*           pBufferBinds;
247                         0u,                                                                                     //deUint32                                                                      imageOpaqueBindCount;
248                         DE_NULL,                                                                        //const VkSparseImageOpaqueMemoryBindInfo*      pImageOpaqueBinds;
249                         0u,                                                                                     //deUint32                                                                      imageBindCount;
250                         DE_NULL,                                                                        //const VkSparseImageMemoryBindInfo*            pImageBinds;
251                         1u,                                                                                     //deUint32                                                                      signalSemaphoreCount;
252                         &bufferMemoryBindSemaphore.get()                        //const VkSemaphore*                                            pSignalSemaphores;
253                 };
254
255                 // Submit sparse bind commands for execution
256                 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
257         }
258
259         // Create output buffer
260         const VkBufferCreateInfo                outputBufferCreateInfo  = makeBufferCreateInfo(m_bufferSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
261         const Unique<VkBuffer>                  outputBuffer                    (createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
262         const de::UniquePtr<Allocation> outputBufferAlloc               (bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
263
264         // Create command buffer for compute and data transfer oparations
265         const Unique<VkCommandPool>       commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
266         const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
267
268         // Start recording commands
269         beginCommandBuffer(deviceInterface, *commandBuffer);
270
271         // Create descriptor set
272         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
273                 DescriptorSetLayoutBuilder()
274                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
275                 .build(deviceInterface, getDevice()));
276
277         // Create compute pipeline
278         const Unique<VkShaderModule>    shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
279         const Unique<VkPipelineLayout>  pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
280         const Unique<VkPipeline>                computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
281
282         deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
283
284         // Create descriptor set
285         const Unique<VkDescriptorPool> descriptorPool(
286                 DescriptorPoolBuilder()
287                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
288                 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
289
290         const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
291
292         {
293                 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBufferWrite, 0u, m_bufferSizeInBytes);
294
295                 DescriptorSetUpdateBuilder()
296                         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
297                         .update(deviceInterface, getDevice());
298         }
299
300         deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
301
302         {
303                 deUint32                 numInvocationsLeft = m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER;
304                 const tcu::UVec3 workGroupSize = computeWorkGroupSize(numInvocationsLeft);
305                 const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
306
307                 numInvocationsLeft -= workGroupSize.x()*workGroupSize.y()*workGroupSize.z();
308
309                 const deUint32  xWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.x());
310                 numInvocationsLeft = numInvocationsLeft / xWorkGroupCount + ((numInvocationsLeft % xWorkGroupCount) ? 1u : 0u);
311                 const deUint32  yWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.y());
312                 numInvocationsLeft = numInvocationsLeft / yWorkGroupCount + ((numInvocationsLeft % yWorkGroupCount) ? 1u : 0u);
313                 const deUint32  zWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.z());
314                 numInvocationsLeft = numInvocationsLeft / zWorkGroupCount + ((numInvocationsLeft % zWorkGroupCount) ? 1u : 0u);
315
316                 if (numInvocationsLeft != 1u)
317                         TCU_THROW(NotSupportedError, "Buffer size is not supported");
318
319                 deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
320         }
321
322         {
323                 const VkBufferMemoryBarrier sparseBufferWriteBarrier
324                         = makeBufferMemoryBarrier(      VK_ACCESS_SHADER_WRITE_BIT,
325                                                                                 VK_ACCESS_TRANSFER_READ_BIT,
326                                                                                 *sparseBufferWrite,
327                                                                                 0ull,
328                                                                                 m_bufferSizeInBytes);
329
330                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferWriteBarrier, 0u, DE_NULL);
331         }
332
333         {
334                 const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSizeInBytes);
335
336                 deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBufferRead, *outputBuffer, 1u, &bufferCopy);
337         }
338
339         {
340                 const VkBufferMemoryBarrier outputBufferHostBarrier
341                         = makeBufferMemoryBarrier(      VK_ACCESS_TRANSFER_WRITE_BIT,
342                                                                                 VK_ACCESS_HOST_READ_BIT,
343                                                                                 *outputBuffer,
344                                                                                 0ull,
345                                                                                 m_bufferSizeInBytes);
346
347                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostBarrier, 0u, DE_NULL);
348         }
349
350         // End recording commands
351         endCommandBuffer(deviceInterface, *commandBuffer);
352
353         // The stage at which execution is going to wait for finish of sparse binding operations
354         const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
355
356         // Submit commands for execution and wait for completion
357         submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(), waitStageBits);
358
359         // Retrieve data from output buffer to host memory
360         invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), m_bufferSizeInBytes);
361
362         const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
363
364         // Wait for sparse queue to become idle
365         deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
366
367         // Prepare reference data
368         std::vector<deUint8> referenceData;
369         referenceData.resize(m_bufferSizeInBytes);
370
371         std::vector<deUint32> referenceDataBlock;
372         referenceDataBlock.resize(MODULO_DIVISOR);
373
374         for (deUint32 valueNdx = 0; valueNdx < MODULO_DIVISOR; ++valueNdx)
375         {
376                 referenceDataBlock[valueNdx] = valueNdx % MODULO_DIVISOR;
377         }
378
379         const deUint32 fullBlockSizeInBytes = MODULO_DIVISOR * SIZE_OF_UINT_IN_SHADER;
380         const deUint32 lastBlockSizeInBytes = m_bufferSizeInBytes % fullBlockSizeInBytes;
381         const deUint32 numberOfBlocks           = m_bufferSizeInBytes / fullBlockSizeInBytes + (lastBlockSizeInBytes ? 1u : 0u);
382
383         for (deUint32 blockNdx = 0; blockNdx < numberOfBlocks; ++blockNdx)
384         {
385                 const deUint32 offset = blockNdx * fullBlockSizeInBytes;
386                 deMemcpy(&referenceData[0] + offset, &referenceDataBlock[0], ((offset + fullBlockSizeInBytes) <= m_bufferSizeInBytes) ? fullBlockSizeInBytes : lastBlockSizeInBytes);
387         }
388
389         // Compare reference data with output data
390         if (deMemCmp(&referenceData[0], outputData, m_bufferSizeInBytes) != 0)
391                 return tcu::TestStatus::fail("Failed");
392         else
393                 return tcu::TestStatus::pass("Passed");
394 }
395
396 TestInstance* BufferSparseMemoryAliasingCase::createInstance (Context& context) const
397 {
398         return new BufferSparseMemoryAliasingInstance(context, m_bufferSizeInBytes);
399 }
400
401 } // anonymous ns
402
403 void addBufferSparseMemoryAliasingTests(tcu::TestCaseGroup* group)
404 {
405         group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440));
406         group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440));
407         group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440));
408         group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440));
409         group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440));
410         group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440));
411 }
412
413 } // sparse
414 } // vkt