1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2021 The Khronos Group Inc.
6 * Copyright (c) 2021 Valve Corporation.
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * \brief Test for VK_EXT_multi_draw
23 *//*--------------------------------------------------------------------*/
25 #include "vktDrawMultiExtTests.hpp"
27 #include "vkTypeUtil.hpp"
28 #include "vkImageWithMemory.hpp"
29 #include "vkObjUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkBufferWithMemory.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkBarrierUtil.hpp"
36 #include "tcuTexture.hpp"
37 #include "tcuMaybe.hpp"
38 #include "tcuImageCompare.hpp"
40 #include "deUniquePtr.hpp"
42 #include "deRandom.hpp"
60 // Normal or indexed draws.
61 enum class DrawType { NORMAL = 0, INDEXED };
63 // How to apply the vertex offset in indexed draws.
64 enum class VertexOffsetType
66 MIXED = 0, // Do not use pVertexOffset and mix values in struct-indicated offsets.
67 CONSTANT_RANDOM, // Use a constant value for pVertexOffset and fill offset struct members with random values.
68 CONSTANT_PACK, // Use a constant value for pVertexOffset and a stride that removes the vertex offset member in structs.
71 // Triangle mesh type.
72 enum class MeshType { MOSAIC = 0, OVERLAPPING };
74 // Vertex offset parameters.
75 struct VertexOffsetParams
77 VertexOffsetType offsetType;
87 deUint32 instanceCount;
88 deUint32 firstInstance;
90 tcu::Maybe<VertexOffsetParams> vertexOffset; // Only used for indexed draws.
93 deUint32 maxInstanceIndex () const
95 if (instanceCount == 0u)
97 return (firstInstance + instanceCount - 1u);
101 // For the color attachment. Must match what the fragment shader expects.
102 VkFormat getColorFormat ()
104 return VK_FORMAT_R8G8B8A8_UINT;
107 // Compatible with getColorFormat() but better when used with the image logging facilities.
108 VkFormat getVerificationFormat ()
110 return VK_FORMAT_R8G8B8A8_UNORM;
113 // Find a suitable format for the depth/stencil buffer.
114 VkFormat chooseDepthStencilFormat (const InstanceInterface& vki, VkPhysicalDevice physDev)
116 // The spec mandates support for one of these two formats.
117 const VkFormat candidates[] = { VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT };
119 for (const auto& format : candidates)
121 const auto properties = getPhysicalDeviceFormatProperties(vki, physDev, format);
122 if ((properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) != 0u)
126 TCU_FAIL("No suitable depth/stencil format found");
127 return VK_FORMAT_UNDEFINED; // Unreachable.
130 // Format used when verifying the stencil aspect.
131 VkFormat getStencilVerificationFormat ()
133 return VK_FORMAT_S8_UINT;
136 deUint32 getTriangleCount ()
138 return 1024u; // This matches the minumum allowed limit for maxMultiDrawCount, so we can submit a single triangle per draw call.
141 // Base class for creating triangles.
142 class TriangleGenerator
145 // Append a new triangle for ID (x, y).
146 virtual void appendTriangle(deUint32 x, deUint32 y, std::vector<tcu::Vec4>& vertices) = 0;
149 // Class that helps creating triangle vertices for each framebuffer pixel, forming a mosaic of triangles.
150 class TriangleMosaicGenerator : public TriangleGenerator
153 // Normalized width and height taking into account the framebuffer's width and height are two units (from -1 to 1).
161 TriangleMosaicGenerator (deUint32 width, deUint32 height)
162 : m_pixelWidth (2.0f / static_cast<float>(width))
163 , m_pixelHeight (2.0f / static_cast<float>(height))
164 , m_deltaX (m_pixelWidth * 0.25f)
165 , m_deltaY (m_pixelHeight * 0.25f)
168 // Creates a triangle for framebuffer pixel (x, y) around its center. Appends the triangle vertices to the given list.
169 void appendTriangle(deUint32 x, deUint32 y, std::vector<tcu::Vec4>& vertices) override
172 const float coordX = (static_cast<float>(x) + 0.5f) * m_pixelWidth - 1.0f;
173 const float coordY = (static_cast<float>(y) + 0.5f) * m_pixelHeight - 1.0f;
175 // Triangle around it.
176 const float topY = coordY - m_deltaY;
177 const float bottomY = coordY + m_deltaY;
179 const float leftX = coordX - m_deltaX;
180 const float rightX = coordX + m_deltaX;
183 vertices.emplace_back(leftX, bottomY, 0.0f, 1.0f);
184 vertices.emplace_back(coordX, topY, 0.0f, 1.0f);
185 vertices.emplace_back(rightX, bottomY, 0.0f, 1.0f);
189 // Class that helps create full-screen triangles that overlap each other.
190 // This generator will generate width*height full-screen triangles with decreasing depth from 0.75 to 0.25.
191 class TriangleOverlapGenerator : public TriangleGenerator
194 // Normalized width and height taking into account the framebuffer's width and height are two units (from -1 to 1).
196 deUint32 m_totalPixels;
199 static constexpr float kMinDepth = 0.25f;
200 static constexpr float kMaxDepth = 0.75f;
201 static constexpr float kDepthRange = kMaxDepth - kMinDepth;
204 TriangleOverlapGenerator (deUint32 width, deUint32 height)
206 , m_totalPixels (width * height)
207 , m_depthStep (kDepthRange / static_cast<float>(m_totalPixels))
210 // Creates full-screen triangle with 2D id (x, y) and decreasing depth with increasing ids.
211 void appendTriangle(deUint32 x, deUint32 y, std::vector<tcu::Vec4>& vertices) override
213 const auto pixelId = static_cast<float>(y * m_width + x);
214 const auto depth = kMaxDepth - m_depthStep * pixelId;
217 vertices.emplace_back(-1.0f, -1.0f, depth, 1.0f);
218 vertices.emplace_back(4.0f, -1.0f, depth, 1.0f);
219 vertices.emplace_back(-1.0f, 4.0f, depth, 1.0f);
223 // Class that helps creating a suitable draw info vector.
228 tcu::Maybe<VertexOffsetType> m_offsetType; // Offset type when m_drawType is DrawType::INDEXED.
229 deUint32 m_stride; // Desired stride. Must be zero or at least as big as the needed VkMultiDraw*InfoExt.
230 deUint32 m_extraBytes; // Used to match the desired stride.
231 de::Random m_random; // Used to generate random offsets.
232 deUint32 m_infoCount; // How many infos have we appended so far?
233 std::vector<deUint8> m_dataVec; // Data vector in generic form.
235 // Are draws indexed and using the offset member of VkMultiDrawIndexedInfoEXT?
236 static bool indexedWithOffset (DrawType drawType, const tcu::Maybe<VertexOffsetType>& offsetType)
238 return (drawType == DrawType::INDEXED && *offsetType != VertexOffsetType::CONSTANT_PACK);
241 // Size in bytes for the base structure used with the given draw type.
242 static deUint32 baseSize (DrawType drawType, const tcu::Maybe<VertexOffsetType>& offsetType)
244 return static_cast<deUint32>(indexedWithOffset(drawType, offsetType) ? sizeof(VkMultiDrawIndexedInfoEXT) : sizeof(VkMultiDrawInfoEXT));
247 // Number of extra bytes per entry according to the given stride.
248 static deUint32 calcExtraBytes (DrawType drawType, const tcu::Maybe<VertexOffsetType>& offsetType, deUint32 stride)
250 // Stride 0 is a special allowed case.
254 const auto minStride = baseSize(drawType, offsetType);
255 DE_ASSERT(stride >= minStride);
256 return (stride - minStride);
259 // Entry size in bytes taking into account the number of extra bytes due to stride.
260 deUint32 entrySize () const
262 return baseSize(m_drawType, m_offsetType) + m_extraBytes;
266 DrawInfoPacker (DrawType drawType, const tcu::Maybe<VertexOffsetType>& offsetType, deUint32 stride, deUint32 estimatedInfoCount, deUint32 seed)
267 : m_drawType (drawType)
268 , m_offsetType (offsetType)
270 , m_extraBytes (calcExtraBytes(drawType, offsetType, stride))
275 // estimatedInfoCount is used to avoid excessive reallocation.
276 if (estimatedInfoCount > 0u)
277 m_dataVec.reserve(estimatedInfoCount * entrySize());
280 void addDrawInfo (deUint32 first, deUint32 count, deInt32 offset)
282 std::vector<deUint8> entry(entrySize(), 0);
284 if (indexedWithOffset(m_drawType, m_offsetType))
286 const auto usedOffset = ((*m_offsetType == VertexOffsetType::CONSTANT_RANDOM) ? m_random.getInt32() : offset);
287 const VkMultiDrawIndexedInfoEXT info = { first, count, usedOffset };
288 deMemcpy(entry.data(), &info, sizeof(info));
292 const VkMultiDrawInfoEXT info = { first, count };
293 deMemcpy(entry.data(), &info, sizeof(info));
296 std::copy(begin(entry), end(entry), std::back_inserter(m_dataVec));
300 deUint32 drawInfoCount () const
305 const void* drawInfoData () const
307 return m_dataVec.data();
310 deUint32 stride () const
316 class MultiDrawTest : public vkt::TestCase
319 MultiDrawTest (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params);
320 virtual ~MultiDrawTest (void) {}
322 void initPrograms (vk::SourceCollections& programCollection) const override;
323 TestInstance* createInstance (Context& context) const override;
324 void checkSupport (Context& context) const override;
330 class MultiDrawInstance : public vkt::TestInstance
333 MultiDrawInstance (Context& context, const TestParams& params);
334 virtual ~MultiDrawInstance (void) {}
336 tcu::TestStatus iterate (void) override;
342 MultiDrawTest::MultiDrawTest (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params)
343 : vkt::TestCase (testCtx, name, description)
347 TestInstance* MultiDrawTest::createInstance (Context& context) const
349 return new MultiDrawInstance(context, m_params);
352 void MultiDrawTest::checkSupport (Context& context) const
354 context.requireDeviceFunctionality("VK_EXT_multi_draw");
357 void MultiDrawTest::initPrograms (vk::SourceCollections& programCollection) const
359 // The general idea behind these tests is to have a 32x32 framebuffer with 1024 pixels and 1024 triangles to draw.
361 // When using a mosaic mesh, the tests will generally draw a single triangle around the center of each of these pixels. When
362 // using an overlapping mesh, each single triangle will cover the whole framebuffer using a different depth value, and the depth
363 // test will be enabled.
365 // The color of each triangle will depend on the instance index and the draw index. This way, it's possible to draw those 1024
366 // triangles with a single draw call or to draw each triangle with a separate draw call, with up to 1024 draw calls.
367 // Combinations in between are possible.
369 // With overlapping meshes, the resulting color buffer will be uniform in color. With mosaic meshes, it depends on the submitted
370 // draw count. In some cases, all pixels will be slightly different in color.
372 // The color buffer will be cleared to transparent black when beginning the render pass, and in some special cases some or all
373 // pixels will preserve that clear color because they will not be drawn into. This happens, for example, if the instance count
374 // or draw count is zero and in some cases of meshed geometry with stride zero.
376 // The output color for each pixel will:
377 // - Have the draw index split into the R and G components.
378 // - Have the instance index I stored into the B component as 255-I.
380 // In addition, the tests will use a depth/stencil buffer. The stencil buffer will be cleared to zero and the depth buffer to an
381 // appropriate initial value (0.0 or 1.0, depending on triangle order). The stencil component will be increased with each draw
382 // on each pixel. This will allow us to verify that not only the last draw for the last instance has set the proper color, but
383 // that all draw operations have taken place.
385 // Make sure the blue channel can be calculated without issues.
386 DE_ASSERT(m_params.maxInstanceIndex() <= 255u);
388 std::ostringstream vert;
392 << "layout (location=0) in vec4 inPos;\n"
393 << "layout (location=0) out uvec4 outColor;\n"
397 << " gl_Position = inPos;\n"
398 << " const uint uDrawIndex = uint(gl_DrawID);\n"
399 << " outColor.r = ((uDrawIndex >> 8u) & 0xFFu);\n"
400 << " outColor.g = ((uDrawIndex ) & 0xFFu);\n"
401 << " outColor.b = 255u - uint(gl_InstanceIndex);\n"
402 << " outColor.a = 255u;\n"
406 std::ostringstream frag;
410 << "layout (location=0) flat in uvec4 inColor;\n"
411 << "layout (location=0) out uvec4 outColor;\n"
415 << " outColor = inColor;\n"
419 programCollection.glslSources.add("vert") << glu::VertexSource(vert.str());
420 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
423 MultiDrawInstance::MultiDrawInstance (Context& context, const TestParams& params)
424 : vkt::TestInstance (context)
428 void appendPaddingVertices (std::vector<tcu::Vec4>& vertices, deUint32 count)
430 for (deUint32 i = 0u; i < count; ++i)
431 vertices.emplace_back(0.0f, 0.0f, 0.0f, 1.0f);
434 tcu::TestStatus MultiDrawInstance::iterate (void)
436 const auto& vki = m_context.getInstanceInterface();
437 const auto physDev = m_context.getPhysicalDevice();
438 const auto& vkd = m_context.getDeviceInterface();
439 const auto device = m_context.getDevice();
440 auto& alloc = m_context.getDefaultAllocator();
441 const auto queue = m_context.getUniversalQueue();
442 const auto qIndex = m_context.getUniversalQueueFamilyIndex();
444 const auto colorFormat = getColorFormat();
445 const auto dsFormat = chooseDepthStencilFormat(vki, physDev);
446 const auto tcuColorFormat = mapVkFormat(colorFormat);
447 const auto triangleCount = getTriangleCount();
448 const auto imageDim = static_cast<deUint32>(deSqrt(static_cast<double>(triangleCount)));
449 const auto imageExtent = makeExtent3D(imageDim, imageDim, 1u);
450 const auto colorUsage = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
451 const auto dsUsage = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
452 const auto pixelCount = imageExtent.width * imageExtent.height;
453 const auto vertexCount = pixelCount * 3u; // Triangle list.
454 const auto isIndexed = (m_params.drawType == DrawType::INDEXED);
455 const auto isMixedMode = (isIndexed && m_params.vertexOffset && m_params.vertexOffset->offsetType == VertexOffsetType::MIXED);
456 const auto extraVertices = (m_params.vertexOffset ? m_params.vertexOffset->offset : 0u);
457 const auto isMosaic = (m_params.meshType == MeshType::MOSAIC);
459 // Make sure we're providing a vertex offset for indexed cases.
460 DE_ASSERT(!isIndexed || static_cast<bool>(m_params.vertexOffset));
462 // Make sure overlapping draws use a single instance.
463 DE_ASSERT(isMosaic || m_params.instanceCount <= 1u);
466 const VkImageCreateInfo imageCreateInfo =
468 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
469 nullptr, // const void* pNext;
470 0u, // VkImageCreateFlags flags;
471 VK_IMAGE_TYPE_2D, // VkImageType imageType;
472 colorFormat, // VkFormat format;
473 imageExtent, // VkExtent3D extent;
474 1u, // deUint32 mipLevels;
475 1u, // deUint32 arrayLayers;
476 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
477 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
478 colorUsage, // VkImageUsageFlags usage;
479 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
480 0u, // deUint32 queueFamilyIndexCount;
481 nullptr, // const deUint32* pQueueFamilyIndices;
482 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
485 ImageWithMemory colorBuffer (vkd, device, alloc, imageCreateInfo, MemoryRequirement::Any);
486 const auto colorSubresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
487 const auto colorBufferView = makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSubresourceRange);
489 // Depth/stencil buffer.
490 const VkImageCreateInfo dsCreateInfo =
492 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
493 nullptr, // const void* pNext;
494 0u, // VkImageCreateFlags flags;
495 VK_IMAGE_TYPE_2D, // VkImageType imageType;
496 dsFormat, // VkFormat format;
497 imageExtent, // VkExtent3D extent;
498 1u, // deUint32 mipLevels;
499 1u, // deUint32 arrayLayers;
500 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
501 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
502 dsUsage, // VkImageUsageFlags usage;
503 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
504 0u, // deUint32 queueFamilyIndexCount;
505 nullptr, // const deUint32* pQueueFamilyIndices;
506 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
509 ImageWithMemory dsBuffer (vkd, device, alloc, dsCreateInfo, MemoryRequirement::Any);
510 const auto dsSubresourceRange = makeImageSubresourceRange((VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT), 0u, 1u, 0u, 1u);
511 const auto dsBufferView = makeImageView(vkd, device, dsBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, dsFormat, dsSubresourceRange);
513 // Buffer to read color attachment.
514 const auto outputBufferSize = pixelCount * static_cast<VkDeviceSize>(tcu::getPixelSize(tcuColorFormat));
515 const auto bufferCreateInfo = makeBufferCreateInfo(outputBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
516 BufferWithMemory outputBuffer (vkd, device, alloc, bufferCreateInfo, MemoryRequirement::HostVisible);
518 // Buffer to read depth/stencil attachment. Note: this supposes we'll only copy the stencil aspect. See below.
519 const auto tcuStencilFmt = mapVkFormat(getStencilVerificationFormat());
520 const auto stencilOutBufferSize = pixelCount * static_cast<VkDeviceSize>(tcu::getPixelSize(tcuStencilFmt));
521 const auto stencilOutCreateInfo = makeBufferCreateInfo(stencilOutBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
522 BufferWithMemory stencilOutBuffer (vkd, device, alloc, stencilOutCreateInfo, MemoryRequirement::HostVisible);
525 const auto vertModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("vert"), 0u);
526 const auto fragModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("frag"), 0u);
528 DescriptorSetLayoutBuilder layoutBuilder;
529 const auto descriptorSetLayout = layoutBuilder.build(vkd, device);
530 const auto pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
533 const auto renderPass = makeRenderPass(vkd, device, colorFormat, dsFormat);
536 const std::vector<VkImageView> attachments { colorBufferView.get(), dsBufferView.get() };
537 const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), static_cast<deUint32>(attachments.size()), de::dataOrNull(attachments), imageExtent.width, imageExtent.height);
539 // Viewports and scissors.
540 const auto viewport = makeViewport(imageExtent);
541 const std::vector<VkViewport> viewports (1u, viewport);
542 const auto scissor = makeRect2D(imageExtent);
543 const std::vector<VkRect2D> scissors (1u, scissor);
545 // Indexed draws will have triangle vertices in reverse order. See index buffer creation below.
546 const auto frontFace = (isIndexed ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE);
547 const VkPipelineRasterizationStateCreateInfo rasterizationInfo =
549 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
550 nullptr, // const void* pNext;
551 0u, // VkPipelineRasterizationStateCreateFlags flags;
552 VK_FALSE, // VkBool32 depthClampEnable;
553 VK_FALSE, // VkBool32 rasterizerDiscardEnable;
554 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
555 VK_CULL_MODE_BACK_BIT, // VkCullModeFlags cullMode;
556 frontFace, // VkFrontFace frontFace;
557 VK_FALSE, // VkBool32 depthBiasEnable;
558 0.0f, // float depthBiasConstantFactor;
559 0.0f, // float depthBiasClamp;
560 0.0f, // float depthBiasSlopeFactor;
561 1.0f, // float lineWidth;
564 const auto frontStencilState = makeStencilOpState(VK_STENCIL_OP_KEEP, VK_STENCIL_OP_INCREMENT_AND_WRAP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS, 0xFFu, 0xFFu, 0u);
565 const auto backStencilState = makeStencilOpState(VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_NEVER, 0xFFu, 0xFFu, 0u);
566 const auto depthTestEnable = (isMosaic ? VK_FALSE : VK_TRUE);
567 const auto depthWriteEnable = depthTestEnable;
568 const auto depthCompareOp = (isMosaic ? VK_COMPARE_OP_ALWAYS : (isIndexed ? VK_COMPARE_OP_GREATER : VK_COMPARE_OP_LESS));
570 const VkPipelineDepthStencilStateCreateInfo depthStencilInfo =
572 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType;
573 nullptr, // const void* pNext;
574 0u, // VkPipelineDepthStencilStateCreateFlags flags;
575 depthTestEnable, // VkBool32 depthTestEnable;
576 depthWriteEnable, // VkBool32 depthWriteEnable;
577 depthCompareOp, // VkCompareOp depthCompareOp;
578 VK_FALSE, // VkBool32 depthBoundsTestEnable;
579 VK_TRUE, // VkBool32 stencilTestEnable;
580 frontStencilState, // VkStencilOpState front;
581 backStencilState, // VkStencilOpState back;
582 0.0f, // float minDepthBounds;
583 1.0f, // float maxDepthBounds;
587 const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
588 vertModule.get(), DE_NULL, DE_NULL, DE_NULL, fragModule.get(),
589 renderPass.get(), viewports, scissors, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0u/*subpass*/, 0u/*patchControlPoints*/,
590 nullptr/*vertexInputStateCreateInfo*/, &rasterizationInfo, nullptr/*multisampleStateCreateInfo*/, &depthStencilInfo);
592 // Command pool and buffer.
593 const auto cmdPool = makeCommandPool(vkd, device, qIndex);
594 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
595 const auto cmdBuffer = cmdBufferPtr.get();
597 // Create vertex buffer.
598 std::vector<tcu::Vec4> triangleVertices;
599 triangleVertices.reserve(vertexCount + extraVertices);
601 // Vertex count per draw call.
602 const bool atLeastOneDraw = (m_params.drawCount > 0u);
603 const bool moreThanOneDraw = (m_params.drawCount > 1u);
604 const auto trianglesPerDraw = (atLeastOneDraw ? pixelCount / m_params.drawCount : 0u);
605 const auto verticesPerDraw = trianglesPerDraw * 3u;
608 DE_ASSERT(pixelCount % m_params.drawCount == 0u);
611 using TriangleGeneratorPtr = de::MovePtr<TriangleGenerator>;
612 TriangleGeneratorPtr triangleGen;
614 if (m_params.meshType == MeshType::MOSAIC)
615 triangleGen = TriangleGeneratorPtr(new TriangleMosaicGenerator(imageExtent.width, imageExtent.height));
616 else if (m_params.meshType == MeshType::OVERLAPPING)
617 triangleGen = TriangleGeneratorPtr(new TriangleOverlapGenerator(imageExtent.width, imageExtent.height));
621 // When applying a vertex offset in nonmixed modes, there will be a few extra vertices at the start of the vertex buffer.
622 if (isIndexed && !isMixedMode)
623 appendPaddingVertices(triangleVertices, extraVertices);
625 for (deUint32 y = 0u; y < imageExtent.height; ++y)
626 for (deUint32 x = 0u; x < imageExtent.width; ++x)
628 // When applying a vertex offset in mixed mode, there will be some extra padding between the triangles for the first
629 // block and the rest, so that the vertex offset will not be constant in all draw info structures. This way, the first
630 // triangles will always have offset zero, and the number of them depends on the given draw count.
631 const auto pixelIndex = y * imageExtent.width + x;
632 if (isIndexed && isMixedMode && moreThanOneDraw && pixelIndex == trianglesPerDraw)
633 appendPaddingVertices(triangleVertices, extraVertices);
635 triangleGen->appendTriangle(x, y, triangleVertices);
639 const auto vertexBufferSize = static_cast<VkDeviceSize>(de::dataSize(triangleVertices));
640 const auto vertexBufferInfo = makeBufferCreateInfo(vertexBufferSize, (VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
641 BufferWithMemory vertexBuffer (vkd, device, alloc, vertexBufferInfo, MemoryRequirement::HostVisible);
642 auto& vertexBufferAlloc = vertexBuffer.getAllocation();
643 const auto vertexBufferOffset = vertexBufferAlloc.getOffset();
644 void* vertexBufferData = vertexBufferAlloc.getHostPtr();
646 deMemcpy(vertexBufferData, triangleVertices.data(), de::dataSize(triangleVertices));
647 flushAlloc(vkd, device, vertexBufferAlloc);
649 // Index buffer if needed.
650 de::MovePtr<BufferWithMemory> indexBuffer;
651 VkDeviceSize indexBufferOffset = 0ull;
655 // Indices will be given in reverse order, so they effectively also make the triangles have reverse winding order.
656 std::vector<deUint32> indices;
657 indices.reserve(vertexCount);
658 for (deUint32 i = 0u; i < vertexCount; ++i)
659 indices.push_back(vertexCount - i - 1u);
661 const auto indexBufferSize = static_cast<VkDeviceSize>(de::dataSize(indices));
662 const auto indexBufferInfo = makeBufferCreateInfo(indexBufferSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
663 indexBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, alloc, indexBufferInfo, MemoryRequirement::HostVisible));
664 auto& indexBufferAlloc = indexBuffer->getAllocation();
665 indexBufferOffset = indexBufferAlloc.getOffset();
666 void* indexBufferData = indexBufferAlloc.getHostPtr();
668 deMemcpy(indexBufferData, indices.data(), de::dataSize(indices));
669 flushAlloc(vkd, device, indexBufferAlloc);
672 beginCommandBuffer(vkd, cmdBuffer);
674 // Transition depth/stencil attachment to the proper initial layout for the render pass.
675 const auto dsPreBarrier = makeImageMemoryBarrier(
677 (VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT),
678 VK_IMAGE_LAYOUT_UNDEFINED,
679 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
680 dsBuffer.get(), dsSubresourceRange);
682 vkd.cmdPipelineBarrier(
684 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
685 (VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT),
686 0u, 0u, nullptr, 0u, nullptr, 1u, &dsPreBarrier);
689 std::vector<VkClearValue> clearValues;
690 clearValues.reserve(2u);
691 clearValues.push_back(makeClearValueColorU32(0u, 0u, 0u, 0u));
692 clearValues.push_back(makeClearValueDepthStencil(((isMosaic || isIndexed) ? 0.0f : 1.0f), 0u));
694 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissor, static_cast<deUint32>(clearValues.size()), de::dataOrNull(clearValues));
696 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
697 vkd.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &vertexBuffer.get(), &vertexBufferOffset);
699 vkd.cmdBindIndexBuffer(cmdBuffer, indexBuffer->get(), indexBufferOffset, VK_INDEX_TYPE_UINT32);
702 const auto offsetType = (m_params.vertexOffset ? m_params.vertexOffset->offsetType : tcu::nothing<VertexOffsetType>());
703 const auto vertexOffset = static_cast<deInt32>(extraVertices);
705 DrawInfoPacker drawInfos(m_params.drawType, offsetType, m_params.stride, m_params.drawCount, m_params.seed);
707 if (m_params.drawCount > 0u)
709 deUint32 vertexIndex = 0u;
710 for (deUint32 drawIdx = 0u; drawIdx < m_params.drawCount; ++drawIdx)
712 // For indexed draws in mixed offset mode, taking into account vertex indices have been stored in reversed order and
713 // there may be a padding in the vertex buffer after the first verticesPerDraw vertices, we need to use offset 0 in the
714 // last draw call. That draw will contain the indices for the first verticesPerDraw vertices, which are stored without
715 // any offset, while other draw calls will use indices which are off by extraVertices vertices. This will make sure not
716 // every draw call will use the same offset and the implementation handles that.
717 const auto drawOffset = ((isIndexed && (!isMixedMode || (moreThanOneDraw && drawIdx < m_params.drawCount - 1u))) ? vertexOffset : 0);
718 drawInfos.addDrawInfo(vertexIndex, verticesPerDraw, drawOffset);
719 vertexIndex += verticesPerDraw;
725 const auto drawInfoPtr = reinterpret_cast<const VkMultiDrawIndexedInfoEXT*>(drawInfos.drawInfoData());
726 const auto offsetPtr = (isMixedMode ? nullptr : &vertexOffset);
727 vkd.cmdDrawMultiIndexedEXT(cmdBuffer, drawInfos.drawInfoCount(), drawInfoPtr, m_params.instanceCount, m_params.firstInstance, drawInfos.stride(), offsetPtr);
731 const auto drawInfoPtr = reinterpret_cast<const VkMultiDrawInfoEXT*>(drawInfos.drawInfoData());
732 vkd.cmdDrawMultiEXT(cmdBuffer, drawInfos.drawInfoCount(), drawInfoPtr, m_params.instanceCount, m_params.firstInstance, drawInfos.stride());
735 endRenderPass(vkd, cmdBuffer);
737 // Prepare images for copying.
738 const auto colorBufferBarrier = makeImageMemoryBarrier(
739 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
740 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
741 colorBuffer.get(), colorSubresourceRange);
742 vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &colorBufferBarrier);
744 const auto dsBufferBarrier = makeImageMemoryBarrier(
745 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
746 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
747 dsBuffer.get(), dsSubresourceRange);
748 vkd.cmdPipelineBarrier(cmdBuffer, (VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT), VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &dsBufferBarrier);
750 // Copy images to output buffers.
751 const auto colorSubresourceLayers = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
752 const auto colorCopyRegion = makeBufferImageCopy(imageExtent, colorSubresourceLayers);
753 vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, outputBuffer.get(), 1u, &colorCopyRegion);
755 // Note: this only copies the stencil aspect. See stencilOutBuffer creation.
756 const auto stencilSubresourceLayers = makeImageSubresourceLayers(VK_IMAGE_ASPECT_STENCIL_BIT, 0u, 0u, 1u);
757 const auto stencilCopyRegion = makeBufferImageCopy(imageExtent, stencilSubresourceLayers);
758 vkd.cmdCopyImageToBuffer(cmdBuffer, dsBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, stencilOutBuffer.get(), 1u, &stencilCopyRegion);
760 // Prepare buffers for host reading.
761 const auto outputBufferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
762 vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &outputBufferBarrier, 0u, nullptr, 0u, nullptr);
764 endCommandBuffer(vkd, cmdBuffer);
765 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
767 // Read output buffers and verify their contents.
768 auto& outputBufferAlloc = outputBuffer.getAllocation();
769 invalidateAlloc(vkd, device, outputBufferAlloc);
770 const void* outputBufferData = outputBufferAlloc.getHostPtr();
772 auto& stencilOutBufferAlloc = stencilOutBuffer.getAllocation();
773 invalidateAlloc(vkd, device, stencilOutBufferAlloc);
774 const void* stencilOutBufferData = stencilOutBufferAlloc.getHostPtr();
776 const auto iWidth = static_cast<int>(imageExtent.width);
777 const auto iHeight = static_cast<int>(imageExtent.height);
779 const auto colorVerificationFormat = mapVkFormat(getVerificationFormat());
780 tcu::ConstPixelBufferAccess colorAccess (colorVerificationFormat, iWidth, iHeight, 1, outputBufferData);
781 tcu::ConstPixelBufferAccess stencilAccess (tcuStencilFmt, iWidth, iHeight, 1, stencilOutBufferData);
783 // Generate reference images.
784 tcu::TextureLevel refColorLevel (colorVerificationFormat, iWidth, iHeight);
785 tcu::PixelBufferAccess refColorAccess = refColorLevel.getAccess();
786 tcu::TextureLevel refStencilLevel (tcuStencilFmt, iWidth, iHeight);
787 tcu::PixelBufferAccess refStencilAccess = refStencilLevel.getAccess();
788 tcu::IVec4 referenceColor;
789 int referenceStencil;
790 const auto maxInstanceIndex = m_params.maxInstanceIndex();
792 // With stride zero, mosaic meshes increment the stencil buffer as many times as draw operations for affected pixels and
793 // overlapping meshes increment the stencil buffer only in the first draw operation (the rest fail the depth test) as many times
794 // as triangles per draw.
796 // With nonzero stride, mosaic meshes increment the stencil buffer once per pixel. Overlapping meshes increment it once per
798 const auto stencilIncrements = ((m_params.stride == 0u)
799 ? (isMosaic ? drawInfos.drawInfoCount() : trianglesPerDraw)
800 : (isMosaic ? 1u : triangleCount));
802 for (int y = 0; y < iHeight; ++y)
803 for (int x = 0; x < iWidth; ++x)
805 const auto pixelNumber = static_cast<deUint32>(y * iWidth + x);
806 const auto triangleIndex = (isIndexed ? (pixelCount - 1u - pixelNumber) : pixelNumber); // Reverse order for indexed draws.
808 if (m_params.instanceCount == 0u || drawInfos.drawInfoCount() == 0u ||
809 (m_params.stride == 0u && triangleIndex >= trianglesPerDraw && isMosaic))
811 // Some pixels may not be drawn into when there are no instances or draws, or when the stride is zero in mosaic mode.
812 referenceColor = tcu::IVec4(0, 0, 0, 0);
813 referenceStencil = 0;
817 // This must match the vertex shader.
819 // With stride zero, the same block is drawn over and over again in each draw call. This affects both the draw index and
820 // the values in the depth/stencil buffer and, with overlapping meshes, only the first draw passes the depth test.
822 // With nonzero stride, the draw index depends on the triangle index and the number of triangles per draw and, for
823 // overlapping meshes, the draw index is always the last one.
824 const auto drawIndex = (m_params.stride == 0u
825 ? (isMosaic ? (drawInfos.drawInfoCount() - 1u) : 0u)
826 : (isMosaic ? (triangleIndex / trianglesPerDraw) : (drawInfos.drawInfoCount() - 1u)));
827 referenceColor = tcu::IVec4(
828 static_cast<int>((drawIndex >> 8) & 0xFFu),
829 static_cast<int>((drawIndex ) & 0xFFu),
830 static_cast<int>(255u - maxInstanceIndex),
833 referenceStencil = static_cast<int>((m_params.instanceCount * stencilIncrements) % 256u); // VK_STENCIL_OP_INCREMENT_AND_WRAP.
836 refColorAccess.setPixel(referenceColor, x, y);
837 refStencilAccess.setPixStencil(referenceStencil, x, y);
841 auto& log = m_context.getTestContext().getLog();
842 const auto logMode = tcu::CompareLogMode::COMPARE_LOG_ON_ERROR;
844 if (!tcu::intThresholdCompare(log, "ColorTestResult", "", refColorAccess, colorAccess, tcu::UVec4(0u, 0u, 0u, 0u), logMode))
845 return tcu::TestStatus::fail("Color image comparison failed; check log for more details");
847 if (!tcu::dsThresholdCompare(log, "StencilTestResult", "", refStencilAccess, stencilAccess, 0.0f, logMode))
848 return tcu::TestStatus::fail("Stencil image comparison failed; check log for more details");
851 return tcu::TestStatus::pass("Pass");
856 tcu::TestCaseGroup* createDrawMultiExtTests (tcu::TestContext& testCtx)
858 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
860 GroupPtr drawMultiGroup (new tcu::TestCaseGroup(testCtx, "multi_draw", "VK_EXT_multi_draw tests"));
868 { MeshType::MOSAIC, "mosaic" },
869 { MeshType::OVERLAPPING, "overlapping" },
878 { DrawType::NORMAL, "normal" },
879 { DrawType::INDEXED, "indexed" },
884 tcu::Maybe<VertexOffsetType> vertexOffsetType;
886 } offsetTypeCases[] =
888 { tcu::nothing<VertexOffsetType>(), "" },
889 { VertexOffsetType::MIXED, "mixed" },
890 { VertexOffsetType::CONSTANT_RANDOM, "random" },
891 { VertexOffsetType::CONSTANT_PACK, "packed" },
903 { getTriangleCount(), "max_draws" },
912 { -1, "stride_zero" },
913 { 0, "standard_stride" },
914 { 4, "stride_extra_4" },
915 { 12, "stride_extra_12" },
920 deUint32 firstInstance;
921 deUint32 instanceCount;
925 { 0u, 0u, "no_instances" },
926 { 0u, 1u, "1_instance" },
927 { 0u, 10u, "10_instances" },
928 { 3u, 2u, "2_instances_base_3" },
931 constexpr deUint32 kSeed = 1621260419u;
933 for (const auto& meshTypeCase : meshTypeCases)
935 GroupPtr meshTypeGroup(new tcu::TestCaseGroup(testCtx, meshTypeCase.name, ""));
937 for (const auto& drawTypeCase : drawTypeCases)
939 for (const auto& offsetTypeCase : offsetTypeCases)
941 const auto hasOffsetType = static_cast<bool>(offsetTypeCase.vertexOffsetType);
942 if ((drawTypeCase.drawType == DrawType::NORMAL && hasOffsetType) ||
943 (drawTypeCase.drawType == DrawType::INDEXED && !hasOffsetType))
948 std::string drawGroupName = drawTypeCase.name;
950 drawGroupName += std::string("_") + offsetTypeCase.name;
952 GroupPtr drawTypeGroup(new tcu::TestCaseGroup(testCtx, drawGroupName.c_str(), ""));
954 for (const auto& drawCountCase : drawCountCases)
956 GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, drawCountCase.name, ""));
958 for (const auto& strideCase : strideCases)
960 GroupPtr strideGroup(new tcu::TestCaseGroup(testCtx, strideCase.name, ""));
962 for (const auto& instanceCase : instanceCases)
964 GroupPtr instanceGroup(new tcu::TestCaseGroup(testCtx, instanceCase.name, ""));
966 const auto isIndexed = (drawTypeCase.drawType == DrawType::INDEXED);
967 const auto isPacked = (offsetTypeCase.vertexOffsetType && *offsetTypeCase.vertexOffsetType == VertexOffsetType::CONSTANT_PACK);
968 const auto baseStride = ((isIndexed && !isPacked) ? sizeof(VkMultiDrawIndexedInfoEXT) : sizeof(VkMultiDrawInfoEXT));
969 const auto& extraBytes = strideCase.extraBytes;
970 const auto testOffset = (isIndexed ? VertexOffsetParams{*offsetTypeCase.vertexOffsetType, 0u } : tcu::nothing<VertexOffsetParams>());
971 deUint32 testStride = 0u;
974 testStride = static_cast<deUint32>(baseStride) + static_cast<deUint32>(extraBytes);
976 // For overlapping triangles we will skip instanced drawing.
977 if (instanceCase.instanceCount > 1u && meshTypeCase.meshType == MeshType::OVERLAPPING)
982 meshTypeCase.meshType, // MeshType meshType;
983 drawTypeCase.drawType, // DrawType drawType;
984 drawCountCase.drawCount, // deUint32 drawCount;
985 instanceCase.instanceCount, // deUint32 instanceCount;
986 instanceCase.firstInstance, // deUint32 firstInstance;
987 testStride, // deUint32 stride;
988 testOffset, // tcu::Maybe<VertexOffsetParams>> vertexOffset; // Only used for indexed draws.
989 kSeed, // deUint32 seed;
992 instanceGroup->addChild(new MultiDrawTest(testCtx, "no_offset", "", params));
996 params.vertexOffset->offset = 6u;
997 instanceGroup->addChild(new MultiDrawTest(testCtx, "offset_6", "", params));
1000 strideGroup->addChild(instanceGroup.release());
1003 drawCountGroup->addChild(strideGroup.release());
1006 drawTypeGroup->addChild(drawCountGroup.release());
1009 meshTypeGroup->addChild(drawTypeGroup.release());
1013 drawMultiGroup->addChild(meshTypeGroup.release());
1016 return drawMultiGroup.release();