1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * \brief Concurrent draw tests
23 * Tests that create queue for rendering as well as queue for
24 * compute, and trigger work on both pipelines at the same time,
25 * and finally verify that the results are as expected.
26 *//*--------------------------------------------------------------------*/
28 #include "vktDrawConcurrentTests.hpp"
30 #include "vktCustomInstancesDevices.hpp"
31 #include "vktTestCaseUtil.hpp"
32 #include "vktDrawTestCaseUtil.hpp"
33 #include "../compute/vktComputeTestsUtil.hpp"
35 #include "vktDrawBaseClass.hpp"
37 #include "tcuTestLog.hpp"
38 #include "tcuResource.hpp"
39 #include "tcuImageCompare.hpp"
40 #include "tcuTextureUtil.hpp"
41 #include "tcuRGBA.hpp"
44 #include "vkCmdUtil.hpp"
45 #include "vkQueryUtil.hpp"
46 #include "vkBuilderUtil.hpp"
47 #include "vkBarrierUtil.hpp"
48 #include "vkObjUtil.hpp"
49 #include "vkDeviceUtil.hpp"
50 #include "vkSafetyCriticalUtil.hpp"
52 #include "deRandom.hpp"
63 class ConcurrentDraw : public DrawTestsBaseClass
66 typedef TestSpecBase TestSpec;
67 ConcurrentDraw (Context &context, TestSpec testSpec);
68 virtual tcu::TestStatus iterate (void);
71 ConcurrentDraw::ConcurrentDraw (Context &context, TestSpec testSpec)
72 : DrawTestsBaseClass(context, testSpec.shaders[glu::SHADERTYPE_VERTEX], testSpec.shaders[glu::SHADERTYPE_FRAGMENT], testSpec.useDynamicRendering, testSpec.topology)
74 m_data.push_back(VertexElementData(tcu::Vec4(1.0f, -1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
75 m_data.push_back(VertexElementData(tcu::Vec4(-1.0f, 1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
77 int refVertexIndex = 2;
79 for (int i = 0; i < 1000; i++)
81 m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
82 m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
83 m_data.push_back(VertexElementData(tcu::Vec4(0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
84 m_data.push_back(VertexElementData(tcu::Vec4(0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
85 m_data.push_back(VertexElementData(tcu::Vec4(0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
86 m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
88 m_data.push_back(VertexElementData(tcu::Vec4(-1.0f, 1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
93 tcu::TestStatus ConcurrentDraw::iterate (void)
97 NO_MATCH_FOUND = ~((deUint32)0),
99 ERROR_WAIT_COMPUTE = 1,
106 deUint32 queueFamilyIndex;
109 const deUint32 numValues = 1024;
110 const CustomInstance instance (createCustomInstanceFromContext(m_context));
111 const InstanceDriver& instanceDriver (instance.getDriver());
112 const VkPhysicalDevice physicalDevice = chooseDevice(instanceDriver, instance, m_context.getTestContext().getCommandLine());
114 // const InstanceInterface& instance = m_context.getInstanceInterface();
115 // const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
116 const auto validation = m_context.getTestContext().getCommandLine().isValidationEnabled();
117 tcu::TestLog& log = m_context.getTestContext().getLog();
118 Move<VkDevice> computeDevice;
119 std::vector<VkQueueFamilyProperties> queueFamilyProperties;
120 VkDeviceCreateInfo deviceInfo;
121 VkPhysicalDeviceFeatures deviceFeatures;
122 const float queuePriority = 1.0f;
123 VkDeviceQueueCreateInfo queueInfos;
124 Queue computeQueue = { DE_NULL, (deUint32)NO_MATCH_FOUND };
128 queueFamilyProperties = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
130 for (deUint32 queueNdx = 0; queueNdx < queueFamilyProperties.size(); ++queueNdx)
132 if (queueFamilyProperties[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
134 if (computeQueue.queueFamilyIndex == NO_MATCH_FOUND)
135 computeQueue.queueFamilyIndex = queueNdx;
139 if (computeQueue.queueFamilyIndex == NO_MATCH_FOUND)
140 TCU_THROW(NotSupportedError, "Compute queue couldn't be created");
142 VkDeviceQueueCreateInfo queueInfo;
143 deMemset(&queueInfo, 0, sizeof(queueInfo));
145 queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
146 queueInfo.pNext = DE_NULL;
147 queueInfo.flags = (VkDeviceQueueCreateFlags)0u;
148 queueInfo.queueFamilyIndex = computeQueue.queueFamilyIndex;
149 queueInfo.queueCount = 1;
150 queueInfo.pQueuePriorities = &queuePriority;
152 queueInfos = queueInfo;
154 deMemset(&deviceInfo, 0, sizeof(deviceInfo));
155 instanceDriver.getPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);
157 void* pNext = DE_NULL;
158 #ifdef CTS_USES_VULKANSC
159 VkDeviceObjectReservationCreateInfo memReservationInfo = m_context.getTestContext().getCommandLine().isSubProcess() ? m_context.getResourceInterface()->getStatMax() : resetDeviceObjectReservationCreateInfo();
160 memReservationInfo.pNext = pNext;
161 pNext = &memReservationInfo;
163 VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
164 sc10Features.pNext = pNext;
165 pNext = &sc10Features;
167 VkPipelineCacheCreateInfo pcCI;
168 std::vector<VkPipelinePoolSize> poolSizes;
169 if (m_context.getTestContext().getCommandLine().isSubProcess())
171 if (m_context.getResourceInterface()->getCacheDataSize() > 0)
175 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
176 DE_NULL, // const void* pNext;
177 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
178 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
179 m_context.getResourceInterface()->getCacheDataSize(), // deUintptr initialDataSize;
180 m_context.getResourceInterface()->getCacheData() // const void* pInitialData;
182 memReservationInfo.pipelineCacheCreateInfoCount = 1;
183 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
186 poolSizes = m_context.getResourceInterface()->getPipelinePoolSizes();
187 if (!poolSizes.empty())
189 memReservationInfo.pipelinePoolSizeCount = deUint32(poolSizes.size());
190 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
193 #endif // CTS_USES_VULKANSC
195 deviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
196 deviceInfo.pNext = pNext;
197 deviceInfo.enabledExtensionCount = 0u;
198 deviceInfo.ppEnabledExtensionNames = DE_NULL;
199 deviceInfo.enabledLayerCount = 0u;
200 deviceInfo.ppEnabledLayerNames = DE_NULL;
201 deviceInfo.pEnabledFeatures = &deviceFeatures;
202 deviceInfo.queueCreateInfoCount = 1;
203 deviceInfo.pQueueCreateInfos = &queueInfos;
205 computeDevice = createCustomDevice(validation, m_context.getPlatformInterface(), instance, instanceDriver, physicalDevice, &deviceInfo);
207 #ifndef CTS_USES_VULKANSC
208 de::MovePtr<vk::DeviceDriver> deviceDriver = de::MovePtr<vk::DeviceDriver>(new vk::DeviceDriver(m_context.getPlatformInterface(), instance, *computeDevice));
210 de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter> deviceDriver = de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(new DeviceDriverSC(m_context.getPlatformInterface(), instance, *computeDevice, m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(), m_context.getDeviceVulkanSC10Properties()), vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), *computeDevice));
211 #endif // CTS_USES_VULKANSC
212 vk::DeviceInterface& vk = *deviceDriver;
214 vk.getDeviceQueue(*computeDevice, computeQueue.queueFamilyIndex, 0, &computeQueue.queue);
216 // Create an input/output buffer
217 const VkPhysicalDeviceMemoryProperties memoryProperties = getPhysicalDeviceMemoryProperties(instanceDriver, physicalDevice);
219 de::MovePtr<SimpleAllocator> allocator = de::MovePtr<SimpleAllocator>(new SimpleAllocator(vk, *computeDevice, memoryProperties));
220 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * numValues;
221 const vkt::compute::Buffer buffer(vk, *computeDevice, *allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
223 // Fill the buffer with data
225 typedef std::vector<deUint32> data_vector_t;
226 data_vector_t inputData(numValues);
229 de::Random rnd(0x82ce7f);
230 const Allocation& bufferAllocation = buffer.getAllocation();
231 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
233 for (deUint32 i = 0; i < numValues; ++i)
235 deUint32 val = rnd.getUint32();
240 flushAlloc(vk, *computeDevice, bufferAllocation);
243 // Create descriptor set
245 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
246 DescriptorSetLayoutBuilder()
247 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
248 .build(vk, *computeDevice));
250 const Unique<VkDescriptorPool> descriptorPool(
251 DescriptorPoolBuilder()
252 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
253 .build(vk, *computeDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
255 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, *computeDevice, *descriptorPool, *descriptorSetLayout));
257 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
258 DescriptorSetUpdateBuilder()
259 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
260 .update(vk, *computeDevice);
262 // Perform the computation
264 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, *computeDevice, m_context.getBinaryCollection().get("vulkan/draw/ConcurrentPayload.comp"), 0u));
266 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, *computeDevice, *descriptorSetLayout));
267 const Unique<VkPipeline> pipeline(makeComputePipeline(vk, *computeDevice, *pipelineLayout, *shaderModule));
268 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
269 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
270 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, *computeDevice, computeQueue.queueFamilyIndex));
271 const Unique<VkCommandBuffer> computeCommandBuffer(allocateCommandBuffer(vk, *computeDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
273 // Compute command buffer
275 beginCommandBuffer(vk, *computeCommandBuffer);
276 vk.cmdBindPipeline(*computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
277 vk.cmdBindDescriptorSets(*computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
278 vk.cmdPipelineBarrier(*computeCommandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
279 vk.cmdDispatch(*computeCommandBuffer, 1, 1, 1);
280 vk.cmdPipelineBarrier(*computeCommandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
281 endCommandBuffer(vk, *computeCommandBuffer);
283 const VkSubmitInfo submitInfo =
285 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
287 0u, // waitSemaphoreCount
288 DE_NULL, // pWaitSemaphores
289 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
290 1u, // commandBufferCount
291 &computeCommandBuffer.get(), // pCommandBuffers
292 0u, // signalSemaphoreCount
293 DE_NULL // pSignalSemaphores
298 const VkQueue drawQueue = m_context.getUniversalQueue();
299 const VkDevice drawDevice = m_context.getDevice();
301 beginCommandBuffer(m_vk, *m_cmdBuffer, 0u);
304 const VkDeviceSize vertexBufferOffset = 0;
305 const VkBuffer vertexBuffer = m_vertexBuffer->object();
307 m_vk.cmdBindVertexBuffers(*m_cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
308 m_vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
310 m_vk.cmdDraw(*m_cmdBuffer, 6, 1, 2, 0);
313 endCommandBuffer(m_vk, *m_cmdBuffer);
315 const VkCommandBuffer drawCommandBuffer = m_cmdBuffer.get();
316 const bool useDeviceGroups = false;
317 const deUint32 deviceMask = 1u;
318 const Unique<VkFence> drawFence(createFence(vk, drawDevice));
320 VkDeviceGroupSubmitInfo deviceGroupSubmitInfo =
322 VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO, // VkStructureType sType;
323 DE_NULL, // const void* pNext;
324 0u, // deUint32 waitSemaphoreCount;
325 DE_NULL, // const deUint32* pWaitSemaphoreDeviceIndices;
326 1u, // deUint32 commandBufferCount;
327 &deviceMask, // const deUint32* pCommandBufferDeviceMasks;
328 0u, // deUint32 signalSemaphoreCount;
329 DE_NULL, // const deUint32* pSignalSemaphoreDeviceIndices;
332 const VkSubmitInfo drawSubmitInfo =
334 VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType sType;
335 useDeviceGroups ? &deviceGroupSubmitInfo : DE_NULL, // const void* pNext;
336 0u, // deUint32 waitSemaphoreCount;
337 DE_NULL, // const VkSemaphore* pWaitSemaphores;
338 (const VkPipelineStageFlags*)DE_NULL, // const VkPipelineStageFlags* pWaitDstStageMask;
339 1u, // deUint32 commandBufferCount;
340 &drawCommandBuffer, // const VkCommandBuffer* pCommandBuffers;
341 0u, // deUint32 signalSemaphoreCount;
342 DE_NULL, // const VkSemaphore* pSignalSemaphores;
345 const Unique<VkFence> computeFence(createFence(vk, *computeDevice));
347 // Submit both compute and draw queues
348 VK_CHECK(vk.queueSubmit(computeQueue.queue, 1u, &submitInfo, *computeFence));
349 VK_CHECK(vk.queueSubmit(drawQueue, 1u, &drawSubmitInfo, *drawFence));
351 int err = ERROR_NONE;
353 if (VK_SUCCESS != vk.waitForFences(*computeDevice, 1u, &computeFence.get(), DE_TRUE, ~0ull))
354 err = ERROR_WAIT_COMPUTE;
356 if (VK_SUCCESS != vk.waitForFences(drawDevice, 1u, &drawFence.get(), DE_TRUE, ~0ull))
357 err = ERROR_WAIT_DRAW;
359 // Have to wait for all fences before calling fail, or some fence may be left hanging.
362 #ifdef CTS_USES_VULKANSC
363 if (m_context.getTestContext().getCommandLine().isSubProcess())
364 #endif // CTS_USES_VULKANSC
366 if (err == ERROR_WAIT_COMPUTE)
368 return tcu::TestStatus::fail("Failed waiting for compute queue fence.");
371 if (err == ERROR_WAIT_DRAW)
373 return tcu::TestStatus::fail("Failed waiting for draw queue fence.");
376 // Validation - compute
378 const Allocation& bufferAllocation = buffer.getAllocation();
379 invalidateAlloc(vk, *computeDevice, bufferAllocation);
380 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
382 for (deUint32 ndx = 0; ndx < numValues; ++ndx)
384 const deUint32 res = bufferPtr[ndx];
385 const deUint32 inp = inputData[ndx];
386 const deUint32 ref = ~inp;
390 std::ostringstream msg;
391 msg << "Comparison failed (compute) for InOut.values[" << ndx << "] ref:" << ref << " res:" << res << " inp:" << inp;
392 return tcu::TestStatus::fail(msg.str());
399 tcu::Texture2D referenceFrame(mapVkFormat(m_colorAttachmentFormat), (int)(0.5f + static_cast<float>(WIDTH)), (int)(0.5f + static_cast<float>(HEIGHT)));
401 referenceFrame.allocLevel(0);
403 const deInt32 frameWidth = referenceFrame.getWidth();
404 const deInt32 frameHeight = referenceFrame.getHeight();
406 tcu::clear(referenceFrame.getLevel(0), tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f));
408 ReferenceImageCoordinates refCoords;
410 for (int y = 0; y < frameHeight; y++)
412 const float yCoord = (float)(y / (0.5 * frameHeight)) - 1.0f;
414 for (int x = 0; x < frameWidth; x++)
416 const float xCoord = (float)(x / (0.5 * frameWidth)) - 1.0f;
418 if ((yCoord >= refCoords.bottom &&
419 yCoord <= refCoords.top &&
420 xCoord >= refCoords.left &&
421 xCoord <= refCoords.right))
422 referenceFrame.getLevel(0).setPixel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), x, y);
426 const VkOffset3D zeroOffset = { 0, 0, 0 };
427 const tcu::ConstPixelBufferAccess renderedFrame = m_colorTargetImage->readSurface(
428 drawQueue, m_context.getDefaultAllocator(), VK_IMAGE_LAYOUT_GENERAL, zeroOffset, WIDTH, HEIGHT, VK_IMAGE_ASPECT_COLOR_BIT);
430 qpTestResult res = QP_TEST_RESULT_PASS;
432 if (!tcu::fuzzyCompare(log, "Result", "Image comparison result",
433 referenceFrame.getLevel(0), renderedFrame, 0.05f,
434 tcu::COMPARE_LOG_RESULT))
436 res = QP_TEST_RESULT_FAIL;
438 return tcu::TestStatus(res, qpGetTestResultName(res));
441 void checkSupport(Context& context, ConcurrentDraw::TestSpec testSpec)
443 if (testSpec.useDynamicRendering)
444 context.requireDeviceFunctionality("VK_KHR_dynamic_rendering");
449 ConcurrentDrawTests::ConcurrentDrawTests (tcu::TestContext &testCtx, bool useDynamicRendering)
450 : TestCaseGroup (testCtx, "concurrent", "concurrent drawing")
451 , m_useDynamicRendering (useDynamicRendering)
453 /* Left blank on purpose */
456 void ConcurrentDrawTests::init (void)
458 ConcurrentDraw::TestSpec testSpec
461 { glu::SHADERTYPE_VERTEX, "vulkan/draw/VertexFetch.vert" },
462 { glu::SHADERTYPE_FRAGMENT, "vulkan/draw/VertexFetch.frag" },
463 { glu::SHADERTYPE_COMPUTE, "vulkan/draw/ConcurrentPayload.comp" }
465 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
466 m_useDynamicRendering
469 addChild(new InstanceFactory<ConcurrentDraw, FunctionSupport1<ConcurrentDraw::TestSpec>>(m_testCtx, "compute_and_triangle_list", "Draws triangle list while running a compute shader", testSpec, FunctionSupport1<ConcurrentDraw::TestSpec>::Args(checkSupport, testSpec)));