Merge pull request #276 from Ella-0/master
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / draw / vktDrawMultiExtTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Test for VK_EXT_multi_draw
23  *//*--------------------------------------------------------------------*/
24
25 #include "vktDrawMultiExtTests.hpp"
26
27 #include "vkTypeUtil.hpp"
28 #include "vkImageWithMemory.hpp"
29 #include "vkObjUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkBufferWithMemory.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkBarrierUtil.hpp"
35
36 #include "tcuTexture.hpp"
37 #include "tcuMaybe.hpp"
38 #include "tcuImageCompare.hpp"
39
40 #include "deUniquePtr.hpp"
41 #include "deMath.h"
42 #include "deRandom.hpp"
43
44 #include <vector>
45 #include <sstream>
46 #include <algorithm>
47 #include <iterator>
48 #include <limits>
49
50 using namespace vk;
51
52 namespace vkt
53 {
54 namespace Draw
55 {
56
57 namespace
58 {
59
60 // Normal or indexed draws.
61 enum class DrawType { NORMAL = 0, INDEXED };
62
63 // How to apply the vertex offset in indexed draws.
64 enum class VertexOffsetType
65 {
66         MIXED = 0,                      // Do not use pVertexOffset and mix values in struct-indicated offsets.
67         CONSTANT_RANDOM,        // Use a constant value for pVertexOffset and fill offset struct members with random values.
68         CONSTANT_PACK,          // Use a constant value for pVertexOffset and a stride that removes the vertex offset member in structs.
69 };
70
71 // Triangle mesh type.
72 enum class MeshType { MOSAIC = 0, OVERLAPPING };
73
74 // Vertex offset parameters.
75 struct VertexOffsetParams
76 {
77         VertexOffsetType        offsetType;
78         deUint32                        offset;
79 };
80
81 // Test parameters.
82 struct TestParams
83 {
84         MeshType                                                meshType;
85         DrawType                                                drawType;
86         deUint32                                                drawCount;
87         deUint32                                                instanceCount;
88         deUint32                                                firstInstance;
89         deUint32                                                stride;
90         tcu::Maybe<VertexOffsetParams>  vertexOffset;   // Only used for indexed draws.
91         deUint32                                                seed;
92         bool                                                    useTessellation;
93         bool                                                    useGeometry;
94         bool                                                    multiview;
95
96         deUint32 maxInstanceIndex () const
97         {
98                 if (instanceCount == 0u)
99                         return 0u;
100                 return (firstInstance + instanceCount - 1u);
101         }
102 };
103
104 // For the color attachment. Must match what the fragment shader expects.
105 VkFormat getColorFormat ()
106 {
107         return VK_FORMAT_R8G8B8A8_UINT;
108 }
109
110 // Compatible with getColorFormat() but better when used with the image logging facilities.
111 VkFormat getVerificationFormat ()
112 {
113         return VK_FORMAT_R8G8B8A8_UNORM;
114 }
115
116 // Find a suitable format for the depth/stencil buffer.
117 VkFormat chooseDepthStencilFormat (const InstanceInterface& vki, VkPhysicalDevice physDev)
118 {
119         // The spec mandates support for one of these two formats.
120         const VkFormat candidates[] = { VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT };
121
122         for (const auto& format : candidates)
123         {
124                 const auto properties = getPhysicalDeviceFormatProperties(vki, physDev, format);
125                 if ((properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) != 0u)
126                         return format;
127         }
128
129         TCU_FAIL("No suitable depth/stencil format found");
130         return VK_FORMAT_UNDEFINED; // Unreachable.
131 }
132
133 // Format used when verifying the stencil aspect.
134 VkFormat getStencilVerificationFormat ()
135 {
136         return VK_FORMAT_S8_UINT;
137 }
138
139 deUint32 getTriangleCount ()
140 {
141         return 1024u;   // This matches the minumum allowed limit for maxMultiDrawCount, so we can submit a single triangle per draw call.
142 }
143
144 // Base class for creating triangles.
145 class TriangleGenerator
146 {
147 public:
148         // Append a new triangle for ID (x, y).
149         virtual void appendTriangle(deUint32 x, deUint32 y, std::vector<tcu::Vec4>& vertices) = 0;
150 };
151
152 // Class that helps creating triangle vertices for each framebuffer pixel, forming a mosaic of triangles.
153 class TriangleMosaicGenerator : public TriangleGenerator
154 {
155 private:
156         // Normalized width and height taking into account the framebuffer's width and height are two units (from -1 to 1).
157         float   m_pixelWidth;
158         float   m_pixelHeight;
159
160         float   m_deltaX;
161         float   m_deltaY;
162
163 public:
164         TriangleMosaicGenerator (deUint32 width, deUint32 height)
165                 : m_pixelWidth  (2.0f / static_cast<float>(width))
166                 , m_pixelHeight (2.0f / static_cast<float>(height))
167                 , m_deltaX              (m_pixelWidth * 0.25f)
168                 , m_deltaY              (m_pixelHeight * 0.25f)
169         {}
170
171         // Creates a triangle for framebuffer pixel (x, y) around its center. Appends the triangle vertices to the given list.
172         void appendTriangle(deUint32 x, deUint32 y, std::vector<tcu::Vec4>& vertices) override
173         {
174                 // Pixel center.
175                 const float coordX      = (static_cast<float>(x) + 0.5f) * m_pixelWidth - 1.0f;
176                 const float coordY      = (static_cast<float>(y) + 0.5f) * m_pixelHeight - 1.0f;
177
178                 // Triangle around it.
179                 const float topY        = coordY - m_deltaY;
180                 const float bottomY     = coordY + m_deltaY;
181
182                 const float leftX       = coordX - m_deltaX;
183                 const float rightX      = coordX + m_deltaX;
184
185                 // Note: clockwise.
186                 vertices.emplace_back(leftX,    bottomY,        0.0f, 1.0f);
187                 vertices.emplace_back(coordX,   topY,           0.0f, 1.0f);
188                 vertices.emplace_back(rightX,   bottomY,        0.0f, 1.0f);
189         }
190 };
191
192 // Class that helps create full-screen triangles that overlap each other.
193 // This generator will generate width*height full-screen triangles with decreasing depth from 0.75 to 0.25.
194 class TriangleOverlapGenerator : public TriangleGenerator
195 {
196 private:
197         // Normalized width and height taking into account the framebuffer's width and height are two units (from -1 to 1).
198         deUint32        m_width;
199         deUint32        m_totalPixels;
200         float           m_depthStep;
201
202         static constexpr float kMinDepth        = 0.25f;
203         static constexpr float kMaxDepth        = 0.75f;
204         static constexpr float kDepthRange      = kMaxDepth - kMinDepth;
205
206 public:
207         TriangleOverlapGenerator (deUint32 width, deUint32 height)
208                 : m_width               (width)
209                 , m_totalPixels (width * height)
210                 , m_depthStep   (kDepthRange / static_cast<float>(m_totalPixels))
211         {}
212
213         // Creates full-screen triangle with 2D id (x, y) and decreasing depth with increasing ids.
214         void appendTriangle(deUint32 x, deUint32 y, std::vector<tcu::Vec4>& vertices) override
215         {
216                 const auto pixelId      = static_cast<float>(y * m_width + x);
217                 const auto depth        = kMaxDepth - m_depthStep * pixelId;
218
219                 // Note: clockwise.
220                 vertices.emplace_back(-1.0f,    -1.0f,  depth, 1.0f);
221                 vertices.emplace_back(4.0f,             -1.0f,  depth, 1.0f);
222                 vertices.emplace_back(-1.0f,    4.0f,   depth, 1.0f);
223         }
224 };
225
226 // Class that helps creating a suitable draw info vector.
227 class DrawInfoPacker
228 {
229 private:
230         DrawType                                                m_drawType;
231         tcu::Maybe<VertexOffsetType>    m_offsetType;   // Offset type when m_drawType is DrawType::INDEXED.
232         deUint32                                                m_stride;               // Desired stride. Must be zero or at least as big as the needed VkMultiDraw*InfoExt.
233         deUint32                                                m_extraBytes;   // Used to match the desired stride.
234         de::Random                                              m_random;               // Used to generate random offsets.
235         deUint32                                                m_infoCount;    // How many infos have we appended so far?
236         std::vector<deUint8>                    m_dataVec;              // Data vector in generic form.
237
238         // Are draws indexed and using the offset member of VkMultiDrawIndexedInfoEXT?
239         static bool indexedWithOffset (DrawType drawType, const tcu::Maybe<VertexOffsetType>& offsetType)
240         {
241                 return (drawType == DrawType::INDEXED && *offsetType != VertexOffsetType::CONSTANT_PACK);
242         }
243
244         // Size in bytes for the base structure used with the given draw type.
245         static deUint32 baseSize (DrawType drawType, const tcu::Maybe<VertexOffsetType>& offsetType)
246         {
247                 return static_cast<deUint32>(indexedWithOffset(drawType, offsetType) ? sizeof(VkMultiDrawIndexedInfoEXT) : sizeof(VkMultiDrawInfoEXT));
248         }
249
250         // Number of extra bytes per entry according to the given stride.
251         static deUint32 calcExtraBytes (DrawType drawType, const tcu::Maybe<VertexOffsetType>& offsetType, deUint32 stride)
252         {
253                 // Stride 0 is a special allowed case.
254                 if (stride == 0u)
255                         return 0u;
256
257                 const auto minStride = baseSize(drawType, offsetType);
258                 DE_ASSERT(stride >= minStride);
259                 return (stride - minStride);
260         }
261
262         // Entry size in bytes taking into account the number of extra bytes due to stride.
263         deUint32 entrySize () const
264         {
265                 return baseSize(m_drawType, m_offsetType) + m_extraBytes;
266         }
267
268 public:
269         DrawInfoPacker  (DrawType drawType, const tcu::Maybe<VertexOffsetType>& offsetType, deUint32 stride, deUint32 estimatedInfoCount, deUint32 seed)
270                 : m_drawType    (drawType)
271                 , m_offsetType  (offsetType)
272                 , m_stride              (stride)
273                 , m_extraBytes  (calcExtraBytes(drawType, offsetType, stride))
274                 , m_random              (seed)
275                 , m_infoCount   (0u)
276                 , m_dataVec             ()
277         {
278                 // estimatedInfoCount is used to avoid excessive reallocation.
279                 if (estimatedInfoCount > 0u)
280                         m_dataVec.reserve(estimatedInfoCount * entrySize());
281         }
282
283         void addDrawInfo (deUint32 first, deUint32 count, deInt32 offset)
284         {
285                 std::vector<deUint8> entry(entrySize(), 0);
286
287                 if (indexedWithOffset(m_drawType, m_offsetType))
288                 {
289                         const auto usedOffset = ((*m_offsetType == VertexOffsetType::CONSTANT_RANDOM) ? m_random.getInt32() : offset);
290                         const VkMultiDrawIndexedInfoEXT info = { first, count, usedOffset };
291                         deMemcpy(entry.data(), &info, sizeof(info));
292                 }
293                 else
294                 {
295                         const VkMultiDrawInfoEXT info = { first, count };
296                         deMemcpy(entry.data(), &info, sizeof(info));
297                 }
298
299                 std::copy(begin(entry), end(entry), std::back_inserter(m_dataVec));
300                 ++m_infoCount;
301         }
302
303         deUint32 drawInfoCount () const
304         {
305                 return m_infoCount;
306         }
307
308         const void* drawInfoData () const
309         {
310                 return m_dataVec.data();
311         }
312
313         deUint32 stride () const
314         {
315                 return m_stride;
316         }
317 };
318
319 class MultiDrawTest : public vkt::TestCase
320 {
321 public:
322                                         MultiDrawTest   (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params);
323         virtual                 ~MultiDrawTest  (void) {}
324
325         void                    initPrograms    (vk::SourceCollections& programCollection) const override;
326         TestInstance*   createInstance  (Context& context) const override;
327         void                    checkSupport    (Context& context) const override;
328
329 private:
330         TestParams              m_params;
331 };
332
333 class MultiDrawInstance : public vkt::TestInstance
334 {
335 public:
336                                                 MultiDrawInstance       (Context& context, const TestParams& params);
337         virtual                         ~MultiDrawInstance      (void) {}
338
339         tcu::TestStatus         iterate                         (void) override;
340
341 private:
342         TestParams                      m_params;
343 };
344
345 MultiDrawTest::MultiDrawTest (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params)
346         : vkt::TestCase (testCtx, name, description)
347         , m_params              (params)
348 {}
349
350 TestInstance* MultiDrawTest::createInstance (Context& context) const
351 {
352         return new MultiDrawInstance(context, m_params);
353 }
354
355 void MultiDrawTest::checkSupport (Context& context) const
356 {
357         context.requireDeviceFunctionality("VK_EXT_multi_draw");
358
359         if (m_params.useTessellation)
360                 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_TESSELLATION_SHADER);
361
362         if (m_params.useGeometry)
363                 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
364
365         if (m_params.multiview)
366         {
367                 const auto& multiviewFeatures = context.getMultiviewFeatures();
368
369                 if (!multiviewFeatures.multiview)
370                         TCU_THROW(NotSupportedError, "Multiview not supported");
371
372                 if (m_params.useTessellation && !multiviewFeatures.multiviewTessellationShader)
373                         TCU_THROW(NotSupportedError, "Multiview not supported with tesellation shaders");
374
375                 if (m_params.useGeometry && !multiviewFeatures.multiviewGeometryShader)
376                         TCU_THROW(NotSupportedError, "Multiview not supported with geometry shaders");
377         }
378 }
379
380 void MultiDrawTest::initPrograms (vk::SourceCollections& programCollection) const
381 {
382         // The general idea behind these tests is to have a 32x32 framebuffer with 1024 pixels and 1024 triangles to draw.
383         //
384         // When using a mosaic mesh, the tests will generally draw a single triangle around the center of each of these pixels. When
385         // using an overlapping mesh, each single triangle will cover the whole framebuffer using a different depth value, and the depth
386         // test will be enabled.
387         //
388         // The color of each triangle will depend on the instance index, the draw index and, when using multiview, the view index. This
389         // way, it's possible to draw those 1024 triangles with a single draw call or to draw each triangle with a separate draw call,
390         // with up to 1024 draw calls. Combinations in between are possible.
391         //
392         // With overlapping meshes, the resulting color buffer will be uniform in color. With mosaic meshes, it depends on the submitted
393         // draw count. In some cases, all pixels will be slightly different in color.
394         //
395         // The color buffer will be cleared to transparent black when beginning the render pass, and in some special cases some or all
396         // pixels will preserve that clear color because they will not be drawn into. This happens, for example, if the instance count
397         // or draw count is zero and in some cases of meshed geometry with stride zero.
398         //
399         // The output color for each pixel will:
400         // - Have the draw index split into the R and G components.
401         // - Have the instance index I stored into the B component as 255-I.
402         //
403         // In addition, the tests will use a depth/stencil buffer. The stencil buffer will be cleared to zero and the depth buffer to an
404         // appropriate initial value (0.0 or 1.0, depending on triangle order). The stencil component will be increased with each draw
405         // on each pixel. This will allow us to verify that not only the last draw for the last instance has set the proper color, but
406         // that all draw operations have taken place.
407
408         // Make sure the blue channel can be calculated without issues.
409         DE_ASSERT(m_params.maxInstanceIndex() <= 255u);
410
411         std::ostringstream vert;
412         vert
413                 << "#version 460\n"
414                 << (m_params.multiview ? "#extension GL_EXT_multiview : enable\n" : "")
415                 << "\n"
416                 << "out gl_PerVertex\n"
417                 << "{\n"
418                 << "    vec4 gl_Position;\n"
419                 << "};\n"
420                 << "\n"
421                 << "layout (location=0) in vec4 inPos;\n"
422                 << "layout (location=0) out uvec4 outColor;\n"
423                 << "\n"
424                 << "void main()\n"
425                 << "{\n"
426                 << "    gl_Position = inPos;\n"
427                 << "    const uint uDrawIndex = uint(gl_DrawID);\n"
428                 << "    outColor.r = ((uDrawIndex >> 8u) & 0xFFu);\n"
429                 << "    outColor.g = ((uDrawIndex      ) & 0xFFu);\n"
430                 << "    outColor.b = 255u - uint(gl_InstanceIndex);\n"
431                 << "    outColor.a = 255u" << (m_params.multiview ? " - uint(gl_ViewIndex)" : "") << ";\n"
432                 << "}\n"
433                 ;
434         programCollection.glslSources.add("vert") << glu::VertexSource(vert.str());
435
436         std::ostringstream frag;
437         frag
438                 << "#version 460\n"
439                 << "\n"
440                 << "layout (location=0) flat in uvec4 inColor;\n"
441                 << "layout (location=0) out uvec4 outColor;\n"
442                 << "\n"
443                 << "void main ()\n"
444                 << "{\n"
445                 << "    outColor = inColor;\n"
446                 << "}\n"
447                 ;
448         programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
449
450         if (m_params.useTessellation)
451         {
452                 std::ostringstream tesc;
453                 tesc
454                         << "#version 460\n"
455                         << "\n"
456                         << "layout (vertices=3) out;\n"
457                         << "in gl_PerVertex\n"
458                         << "{\n"
459                         << "    vec4 gl_Position;\n"
460                         << "} gl_in[gl_MaxPatchVertices];\n"
461                         << "out gl_PerVertex\n"
462                         << "{\n"
463                         << "    vec4 gl_Position;\n"
464                         << "} gl_out[];\n"
465                         << "\n"
466                         << "layout (location=0) in uvec4 inColor[gl_MaxPatchVertices];\n"
467                         << "layout (location=0) out uvec4 outColor[];\n"
468                         << "\n"
469                         << "void main (void)\n"
470                         << "{\n"
471                         << "    gl_TessLevelInner[0] = 1.0;\n"
472                         << "    gl_TessLevelInner[1] = 1.0;\n"
473                         << "    gl_TessLevelOuter[0] = 1.0;\n"
474                         << "    gl_TessLevelOuter[1] = 1.0;\n"
475                         << "    gl_TessLevelOuter[2] = 1.0;\n"
476                         << "    gl_TessLevelOuter[3] = 1.0;\n"
477                         << "    gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
478                         << "    outColor[gl_InvocationID] = inColor[gl_InvocationID];\n"
479                         << "}\n"
480                         ;
481                 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc.str());
482
483                 std::ostringstream tese;
484                 tese
485                         << "#version 460\n"
486                         << "\n"
487                         << "layout (triangles, fractional_odd_spacing, cw) in;\n"
488                         << "in gl_PerVertex\n"
489                         << "{\n"
490                         << "    vec4 gl_Position;\n"
491                         << "} gl_in[gl_MaxPatchVertices];\n"
492                         << "out gl_PerVertex\n"
493                         << "{\n"
494                         << "    vec4 gl_Position;\n"
495                         << "};\n"
496                         << "\n"
497                         << "layout (location=0) in uvec4 inColor[gl_MaxPatchVertices];\n"
498                         << "layout (location=0) out uvec4 outColor;\n"
499                         << "\n"
500                         << "void main (void)\n"
501                         << "{\n"
502                         << "    gl_Position = (gl_TessCoord.x * gl_in[0].gl_Position) +\n"
503                         << "                  (gl_TessCoord.y * gl_in[1].gl_Position) +\n"
504                         << "                  (gl_TessCoord.z * gl_in[2].gl_Position);\n"
505                         << "    outColor = inColor[0];\n"
506                         << "}\n"
507                         ;
508                 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese.str());
509         }
510
511         if (m_params.useGeometry)
512         {
513                 std::ostringstream geom;
514                 geom
515                         << "#version 460\n"
516                         << "\n"
517                         << "layout (triangles) in;\n"
518                         << "layout (triangle_strip, max_vertices=3) out;\n"
519                         << "in gl_PerVertex\n"
520                         << "{\n"
521                         << "    vec4 gl_Position;\n"
522                         << "} gl_in[3];\n"
523                         << "out gl_PerVertex\n"
524                         << "{\n"
525                         << "    vec4 gl_Position;\n"
526                         << "};\n"
527                         << "\n"
528                         << "layout (location=0) in uvec4 inColor[3];\n"
529                         << "layout (location=0) out uvec4 outColor;\n"
530                         << "\n"
531                         << "void main ()\n"
532                         << "{\n"
533                         << "    gl_Position = gl_in[0].gl_Position; outColor = inColor[0]; EmitVertex();\n"
534                         << "    gl_Position = gl_in[1].gl_Position; outColor = inColor[1]; EmitVertex();\n"
535                         << "    gl_Position = gl_in[2].gl_Position; outColor = inColor[2]; EmitVertex();\n"
536                         << "}\n"
537                         ;
538                 programCollection.glslSources.add("geom") << glu::GeometrySource(geom.str());
539         }
540 }
541
542 MultiDrawInstance::MultiDrawInstance (Context& context, const TestParams& params)
543         : vkt::TestInstance     (context)
544         , m_params                      (params)
545 {}
546
547 void appendPaddingVertices (std::vector<tcu::Vec4>& vertices, deUint32 count)
548 {
549         for (deUint32 i = 0u; i < count; ++i)
550                 vertices.emplace_back(0.0f, 0.0f, 0.0f, 1.0f);
551 }
552
553 // Creates a render pass with multiple subpasses, one per layer.
554 Move<VkRenderPass> makeMultidrawRenderPass (const DeviceInterface&      vk,
555                                                                                         VkDevice                                device,
556                                                                                         VkFormat                                colorFormat,
557                                                                                         VkFormat                                depthStencilFormat,
558                                                                                         deUint32                                layerCount)
559 {
560         const VkAttachmentDescription colorAttachmentDescription =
561         {
562                 0u,                                                                                     // VkAttachmentDescriptionFlags    flags
563                 colorFormat,                                                            // VkFormat                        format
564                 VK_SAMPLE_COUNT_1_BIT,                                          // VkSampleCountFlagBits           samples
565                 VK_ATTACHMENT_LOAD_OP_CLEAR,                            // VkAttachmentLoadOp              loadOp
566                 VK_ATTACHMENT_STORE_OP_STORE,                           // VkAttachmentStoreOp             storeOp
567                 VK_ATTACHMENT_LOAD_OP_DONT_CARE,                        // VkAttachmentLoadOp              stencilLoadOp
568                 VK_ATTACHMENT_STORE_OP_DONT_CARE,                       // VkAttachmentStoreOp             stencilStoreOp
569                 VK_IMAGE_LAYOUT_UNDEFINED,                                      // VkImageLayout                   initialLayout
570                 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,       // VkImageLayout                   finalLayout
571         };
572
573         const VkAttachmentDescription depthStencilAttachmentDescription =
574         {
575                 0u,                                                                                                     // VkAttachmentDescriptionFlags    flags
576                 depthStencilFormat,                                                                     // VkFormat                        format
577                 VK_SAMPLE_COUNT_1_BIT,                                                          // VkSampleCountFlagBits           samples
578                 VK_ATTACHMENT_LOAD_OP_CLEAR,                                            // VkAttachmentLoadOp              loadOp
579                 VK_ATTACHMENT_STORE_OP_STORE,                                           // VkAttachmentStoreOp             storeOp
580                 VK_ATTACHMENT_LOAD_OP_CLEAR,                                            // VkAttachmentLoadOp              stencilLoadOp
581                 VK_ATTACHMENT_STORE_OP_STORE,                                           // VkAttachmentStoreOp             stencilStoreOp
582                 VK_IMAGE_LAYOUT_UNDEFINED,                                                      // VkImageLayout                   initialLayout
583                 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,       // VkImageLayout                   finalLayout
584         };
585
586         const std::vector<VkAttachmentDescription>      attachmentDescriptions          = { colorAttachmentDescription, depthStencilAttachmentDescription };
587         const VkAttachmentReference                                     colorAttachmentRef                      = makeAttachmentReference(0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
588         const VkAttachmentReference                                     depthStencilAttachmentRef       = makeAttachmentReference(1u, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
589
590         const VkSubpassDescription subpassDescription =
591         {
592                 0u,                                                                     // VkSubpassDescriptionFlags       flags
593                 VK_PIPELINE_BIND_POINT_GRAPHICS,        // VkPipelineBindPoint             pipelineBindPoint
594                 0u,                                                                     // deUint32                        inputAttachmentCount
595                 nullptr,                                                        // const VkAttachmentReference*    pInputAttachments
596                 1u,                                                                     // deUint32                        colorAttachmentCount
597                 &colorAttachmentRef,                            // const VkAttachmentReference*    pColorAttachments
598                 nullptr,                                                        // const VkAttachmentReference*    pResolveAttachments
599                 &depthStencilAttachmentRef,                     // const VkAttachmentReference*    pDepthStencilAttachment
600                 0u,                                                                     // deUint32                        preserveAttachmentCount
601                 nullptr                                                         // const deUint32*                 pPreserveAttachments
602         };
603
604         std::vector<VkSubpassDescription> subpassDescriptions;
605
606         subpassDescriptions.reserve(layerCount);
607         for (deUint32 subpassIdx = 0u; subpassIdx < layerCount; ++subpassIdx)
608                 subpassDescriptions.push_back(subpassDescription);
609
610         using MultiviewInfoPtr = de::MovePtr<VkRenderPassMultiviewCreateInfo>;
611
612         MultiviewInfoPtr multiviewCreateInfo;
613         std::vector<deUint32> viewMasks;
614
615         if (layerCount > 1u)
616         {
617                 multiviewCreateInfo             = MultiviewInfoPtr(new VkRenderPassMultiviewCreateInfo);
618                 *multiviewCreateInfo    = initVulkanStructure();
619
620                 viewMasks.resize(subpassDescriptions.size());
621                 for (deUint32 subpassIdx = 0u; subpassIdx < static_cast<deUint32>(viewMasks.size()); ++subpassIdx)
622                         viewMasks[subpassIdx] = (1u << subpassIdx);
623
624                 multiviewCreateInfo->subpassCount       = static_cast<deUint32>(viewMasks.size());
625                 multiviewCreateInfo->pViewMasks         = de::dataOrNull(viewMasks);
626         }
627
628         // Dependencies between subpasses for color and depth/stencil read/writes.
629         std::vector<VkSubpassDependency> dependencies;
630         if (layerCount > 1u)
631                 dependencies.reserve((layerCount - 1u) * 2u);
632
633         const auto fragmentTestStages   = (VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT);
634         const auto dsWrites                             = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
635         const auto dsReadWrites                 = (VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT);
636         const auto colorStage                   = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
637         const auto colorWrites                  = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
638         const auto colorReadWrites              = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
639
640         for (deUint32 subpassIdx = 1u; subpassIdx < layerCount; ++subpassIdx)
641         {
642                 const auto prev = subpassIdx - 1u;
643
644                 const VkSubpassDependency dsDep =
645                 {
646                         prev,                                                   //      deUint32                                srcSubpass;
647                         subpassIdx,                                             //      deUint32                                dstSubpass;
648                         fragmentTestStages,                             //      VkPipelineStageFlags    srcStageMask;
649                         fragmentTestStages,                             //      VkPipelineStageFlags    dstStageMask;
650                         dsWrites,                                               //      VkAccessFlags                   srcAccessMask;
651                         dsReadWrites,                                   //      VkAccessFlags                   dstAccessMask;
652                         VK_DEPENDENCY_BY_REGION_BIT,    //      VkDependencyFlags               dependencyFlags;
653                 };
654                 dependencies.push_back(dsDep);
655
656                 const VkSubpassDependency colorDep =
657                 {
658                         prev,                                                   //      deUint32                                srcSubpass;
659                         subpassIdx,                                             //      deUint32                                dstSubpass;
660                         colorStage,                                             //      VkPipelineStageFlags    srcStageMask;
661                         colorStage,                                             //      VkPipelineStageFlags    dstStageMask;
662                         colorWrites,                                    //      VkAccessFlags                   srcAccessMask;
663                         colorReadWrites,                                //      VkAccessFlags                   dstAccessMask;
664                         VK_DEPENDENCY_BY_REGION_BIT,    //      VkDependencyFlags               dependencyFlags;
665                 };
666                 dependencies.push_back(colorDep);
667         }
668
669         const VkRenderPassCreateInfo renderPassInfo =
670         {
671                 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,                              // VkStructureType                   sType
672                 multiviewCreateInfo.get(),                                                              // const void*                       pNext
673                 0u,                                                                                                             // VkRenderPassCreateFlags           flags
674                 static_cast<deUint32>(attachmentDescriptions.size()),   // deUint32                          attachmentCount
675                 de::dataOrNull(attachmentDescriptions),                                 // const VkAttachmentDescription*    pAttachments
676                 static_cast<deUint32>(subpassDescriptions.size()),              // deUint32                          subpassCount
677                 de::dataOrNull(subpassDescriptions),                                    // const VkSubpassDescription*       pSubpasses
678                 static_cast<deUint32>(dependencies.size()),                             // deUint32                          dependencyCount
679                 de::dataOrNull(dependencies),                                                   // const VkSubpassDependency*        pDependencies
680         };
681
682         return createRenderPass(vk, device, &renderPassInfo, nullptr);
683 }
684
685 tcu::TestStatus MultiDrawInstance::iterate (void)
686 {
687         const auto&     vki                             = m_context.getInstanceInterface();
688         const auto      physDev                 = m_context.getPhysicalDevice();
689         const auto&     vkd                             = m_context.getDeviceInterface();
690         const auto      device                  = m_context.getDevice();
691         auto&           alloc                   = m_context.getDefaultAllocator();
692         const auto      queue                   = m_context.getUniversalQueue();
693         const auto      qIndex                  = m_context.getUniversalQueueFamilyIndex();
694
695         const auto      colorFormat             = getColorFormat();
696         const auto      dsFormat                = chooseDepthStencilFormat(vki, physDev);
697         const auto      tcuColorFormat  = mapVkFormat(colorFormat);
698         const auto      triangleCount   = getTriangleCount();
699         const auto      imageDim                = static_cast<deUint32>(deSqrt(static_cast<double>(triangleCount)));
700         const auto      imageExtent             = makeExtent3D(imageDim, imageDim, 1u);
701         const auto      imageLayers             = (m_params.multiview ? 2u : 1u);
702         const auto      colorUsage              = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
703         const auto      dsUsage                 = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
704         const auto      pixelCount              = imageExtent.width * imageExtent.height;
705         const auto      vertexCount             = pixelCount * 3u; // Triangle list.
706         const auto      isIndexed               = (m_params.drawType == DrawType::INDEXED);
707         const auto      isMixedMode             = (isIndexed && m_params.vertexOffset && m_params.vertexOffset->offsetType == VertexOffsetType::MIXED);
708         const auto      extraVertices   = (m_params.vertexOffset ? m_params.vertexOffset->offset : 0u);
709         const auto      isMosaic                = (m_params.meshType == MeshType::MOSAIC);
710
711         // Make sure we're providing a vertex offset for indexed cases.
712         DE_ASSERT(!isIndexed || static_cast<bool>(m_params.vertexOffset));
713
714         // Make sure overlapping draws use a single instance.
715         DE_ASSERT(isMosaic || m_params.instanceCount <= 1u);
716
717         // Color buffer.
718         const VkImageCreateInfo imageCreateInfo =
719         {
720                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,    //      VkStructureType                 sType;
721                 nullptr,                                                                //      const void*                             pNext;
722                 0u,                                                                             //      VkImageCreateFlags              flags;
723                 VK_IMAGE_TYPE_2D,                                               //      VkImageType                             imageType;
724                 colorFormat,                                                    //      VkFormat                                format;
725                 imageExtent,                                                    //      VkExtent3D                              extent;
726                 1u,                                                                             //      deUint32                                mipLevels;
727                 imageLayers,                                                    //      deUint32                                arrayLayers;
728                 VK_SAMPLE_COUNT_1_BIT,                                  //      VkSampleCountFlagBits   samples;
729                 VK_IMAGE_TILING_OPTIMAL,                                //      VkImageTiling                   tiling;
730                 colorUsage,                                                             //      VkImageUsageFlags               usage;
731                 VK_SHARING_MODE_EXCLUSIVE,                              //      VkSharingMode                   sharingMode;
732                 0u,                                                                             //      deUint32                                queueFamilyIndexCount;
733                 nullptr,                                                                //      const deUint32*                 pQueueFamilyIndices;
734                 VK_IMAGE_LAYOUT_UNDEFINED,                              //      VkImageLayout                   initialLayout;
735         };
736
737         ImageWithMemory colorBuffer                             (vkd, device, alloc, imageCreateInfo, MemoryRequirement::Any);
738         const auto              colorSubresourceRange   = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, imageLayers);
739         const auto              colorBufferView                 = makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSubresourceRange);
740
741         // Depth/stencil buffer.
742         const VkImageCreateInfo dsCreateInfo =
743         {
744                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,    //      VkStructureType                 sType;
745                 nullptr,                                                                //      const void*                             pNext;
746                 0u,                                                                             //      VkImageCreateFlags              flags;
747                 VK_IMAGE_TYPE_2D,                                               //      VkImageType                             imageType;
748                 dsFormat,                                                               //      VkFormat                                format;
749                 imageExtent,                                                    //      VkExtent3D                              extent;
750                 1u,                                                                             //      deUint32                                mipLevels;
751                 imageLayers,                                                    //      deUint32                                arrayLayers;
752                 VK_SAMPLE_COUNT_1_BIT,                                  //      VkSampleCountFlagBits   samples;
753                 VK_IMAGE_TILING_OPTIMAL,                                //      VkImageTiling                   tiling;
754                 dsUsage,                                                                //      VkImageUsageFlags               usage;
755                 VK_SHARING_MODE_EXCLUSIVE,                              //      VkSharingMode                   sharingMode;
756                 0u,                                                                             //      deUint32                                queueFamilyIndexCount;
757                 nullptr,                                                                //      const deUint32*                 pQueueFamilyIndices;
758                 VK_IMAGE_LAYOUT_UNDEFINED,                              //      VkImageLayout                   initialLayout;
759         };
760
761         ImageWithMemory dsBuffer                        (vkd, device, alloc, dsCreateInfo, MemoryRequirement::Any);
762         const auto              dsSubresourceRange      = makeImageSubresourceRange((VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT), 0u, 1u, 0u, imageLayers);
763         const auto              dsBufferView            = makeImageView(vkd, device, dsBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, dsFormat, dsSubresourceRange);
764
765         // Output buffers to verify attachments.
766         using BufferWithMemoryPtr = de::MovePtr<BufferWithMemory>;
767
768         // Buffers to read color attachment.
769         const auto outputBufferSize = pixelCount * static_cast<VkDeviceSize>(tcu::getPixelSize(tcuColorFormat));
770         const auto bufferCreateInfo = makeBufferCreateInfo(outputBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
771
772         std::vector<BufferWithMemoryPtr> outputBuffers;
773         for (deUint32 i = 0u; i < imageLayers; ++i)
774                 outputBuffers.push_back(BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, bufferCreateInfo, MemoryRequirement::HostVisible)));
775
776         // Buffer to read depth/stencil attachment. Note: this supposes we'll only copy the stencil aspect. See below.
777         const auto                      tcuStencilFmt                   = mapVkFormat(getStencilVerificationFormat());
778         const auto                      stencilOutBufferSize    = pixelCount * static_cast<VkDeviceSize>(tcu::getPixelSize(tcuStencilFmt));
779         const auto                      stencilOutCreateInfo    = makeBufferCreateInfo(stencilOutBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
780
781         std::vector<BufferWithMemoryPtr> stencilOutBuffers;
782         for (deUint32 i = 0u; i < imageLayers; ++i)
783                 stencilOutBuffers.push_back(BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, stencilOutCreateInfo, MemoryRequirement::HostVisible)));
784
785         // Shaders.
786         const auto                              vertModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("vert"), 0u);
787         const auto                              fragModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("frag"), 0u);
788         Move<VkShaderModule>    tescModule;
789         Move<VkShaderModule>    teseModule;
790         Move<VkShaderModule>    geomModule;
791
792         if (m_params.useGeometry)
793                 geomModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("geom"), 0u);
794
795         if (m_params.useTessellation)
796         {
797                 tescModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("tesc"), 0u);
798                 teseModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("tese"), 0u);
799         }
800
801         DescriptorSetLayoutBuilder      layoutBuilder;
802         const auto                                      descriptorSetLayout     = layoutBuilder.build(vkd, device);
803         const auto                                      pipelineLayout          = makePipelineLayout(vkd, device, descriptorSetLayout.get());
804
805         // Render pass.
806         const auto renderPass = makeMultidrawRenderPass(vkd, device, colorFormat, dsFormat, imageLayers);
807
808         // Framebuffer (note layers is always 1 as required by the spec).
809         const std::vector<VkImageView> attachments { colorBufferView.get(), dsBufferView.get() };
810         const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), static_cast<deUint32>(attachments.size()), de::dataOrNull(attachments), imageExtent.width, imageExtent.height, 1u);
811
812         // Viewports and scissors.
813         const auto                                              viewport        = makeViewport(imageExtent);
814         const std::vector<VkViewport>   viewports       (1u, viewport);
815         const auto                                              scissor         = makeRect2D(imageExtent);
816         const std::vector<VkRect2D>             scissors        (1u, scissor);
817
818         // Indexed draws will have triangle vertices in reverse order. See index buffer creation below.
819         const auto                                                                              frontFace                       = (isIndexed ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE);
820         const VkPipelineRasterizationStateCreateInfo    rasterizationInfo       =
821         {
822                 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,     //      VkStructureType                                                 sType;
823                 nullptr,                                                                                                        //      const void*                                                             pNext;
824                 0u,                                                                                                                     //      VkPipelineRasterizationStateCreateFlags flags;
825                 VK_FALSE,                                                                                                       //      VkBool32                                                                depthClampEnable;
826                 VK_FALSE,                                                                                                       //      VkBool32                                                                rasterizerDiscardEnable;
827                 VK_POLYGON_MODE_FILL,                                                                           //      VkPolygonMode                                                   polygonMode;
828                 VK_CULL_MODE_BACK_BIT,                                                                          //      VkCullModeFlags                                                 cullMode;
829                 frontFace,                                                                                                      //      VkFrontFace                                                             frontFace;
830                 VK_FALSE,                                                                                                       //      VkBool32                                                                depthBiasEnable;
831                 0.0f,                                                                                                           //      float                                                                   depthBiasConstantFactor;
832                 0.0f,                                                                                                           //      float                                                                   depthBiasClamp;
833                 0.0f,                                                                                                           //      float                                                                   depthBiasSlopeFactor;
834                 1.0f,                                                                                                           //      float                                                                   lineWidth;
835         };
836
837         const auto frontStencilState    = makeStencilOpState(VK_STENCIL_OP_KEEP, VK_STENCIL_OP_INCREMENT_AND_WRAP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS, 0xFFu, 0xFFu, 0u);
838         const auto backStencilState             = makeStencilOpState(VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_NEVER, 0xFFu, 0xFFu, 0u);
839         const auto depthTestEnable              = (isMosaic ? VK_FALSE : VK_TRUE);
840         const auto depthWriteEnable             = depthTestEnable;
841         const auto depthCompareOp               = (isMosaic ? VK_COMPARE_OP_ALWAYS : (isIndexed ? VK_COMPARE_OP_GREATER : VK_COMPARE_OP_LESS));
842
843         const VkPipelineDepthStencilStateCreateInfo depthStencilInfo =
844         {
845                 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,             //      VkStructureType                                                 sType;
846                 nullptr,                                                                                                                //      const void*                                                             pNext;
847                 0u,                                                                                                                             //      VkPipelineDepthStencilStateCreateFlags  flags;
848                 depthTestEnable,                                                                                                //      VkBool32                                                                depthTestEnable;
849                 depthWriteEnable,                                                                                               //      VkBool32                                                                depthWriteEnable;
850                 depthCompareOp,                                                                                                 //      VkCompareOp                                                             depthCompareOp;
851                 VK_FALSE,                                                                                                               //      VkBool32                                                                depthBoundsTestEnable;
852                 VK_TRUE,                                                                                                                //      VkBool32                                                                stencilTestEnable;
853                 frontStencilState,                                                                                              //      VkStencilOpState                                                front;
854                 backStencilState,                                                                                               //      VkStencilOpState                                                back;
855                 0.0f,                                                                                                                   //      float                                                                   minDepthBounds;
856                 1.0f,                                                                                                                   //      float                                                                   maxDepthBounds;
857         };
858
859         const auto primitiveTopology    = (m_params.useTessellation ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
860         const auto patchControlPoints   = (m_params.useTessellation ? 3u : 0u);
861
862         // Pipelines.
863         std::vector<Move<VkPipeline>> pipelines;
864         pipelines.reserve(imageLayers);
865         for (deUint32 subpassIdx = 0u; subpassIdx < imageLayers; ++subpassIdx)
866         {
867                 pipelines.emplace_back(makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
868                         vertModule.get(), tescModule.get(), teseModule.get(), geomModule.get(), fragModule.get(),
869                         renderPass.get(), viewports, scissors, primitiveTopology, subpassIdx, patchControlPoints,
870                         nullptr/*vertexInputStateCreateInfo*/, &rasterizationInfo, nullptr/*multisampleStateCreateInfo*/, &depthStencilInfo));
871         }
872
873         // Command pool and buffer.
874         const auto cmdPool              = makeCommandPool(vkd, device, qIndex);
875         const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
876         const auto cmdBuffer    = cmdBufferPtr.get();
877
878         // Create vertex buffer.
879         std::vector<tcu::Vec4> triangleVertices;
880         triangleVertices.reserve(vertexCount + extraVertices);
881
882         // Vertex count per draw call.
883         const bool atLeastOneDraw       = (m_params.drawCount > 0u);
884         const bool moreThanOneDraw      = (m_params.drawCount > 1u);
885         const auto trianglesPerDraw     = (atLeastOneDraw ? pixelCount / m_params.drawCount : 0u);
886         const auto verticesPerDraw      = trianglesPerDraw * 3u;
887
888         if (atLeastOneDraw)
889                 DE_ASSERT(pixelCount % m_params.drawCount == 0u);
890
891         {
892                 using TriangleGeneratorPtr = de::MovePtr<TriangleGenerator>;
893                 TriangleGeneratorPtr triangleGen;
894
895                 if (m_params.meshType == MeshType::MOSAIC)
896                         triangleGen = TriangleGeneratorPtr(new TriangleMosaicGenerator(imageExtent.width, imageExtent.height));
897                 else if (m_params.meshType == MeshType::OVERLAPPING)
898                         triangleGen = TriangleGeneratorPtr(new TriangleOverlapGenerator(imageExtent.width, imageExtent.height));
899                 else
900                         DE_ASSERT(false);
901
902                 // When applying a vertex offset in nonmixed modes, there will be a few extra vertices at the start of the vertex buffer.
903                 if (isIndexed && !isMixedMode)
904                         appendPaddingVertices(triangleVertices, extraVertices);
905
906                 for (deUint32 y = 0u; y < imageExtent.height; ++y)
907                 for (deUint32 x = 0u; x < imageExtent.width; ++x)
908                 {
909                         // When applying a vertex offset in mixed mode, there will be some extra padding between the triangles for the first
910                         // block and the rest, so that the vertex offset will not be constant in all draw info structures. This way, the first
911                         // triangles will always have offset zero, and the number of them depends on the given draw count.
912                         const auto pixelIndex = y * imageExtent.width + x;
913                         if (isIndexed && isMixedMode && moreThanOneDraw && pixelIndex == trianglesPerDraw)
914                                 appendPaddingVertices(triangleVertices, extraVertices);
915
916                         triangleGen->appendTriangle(x, y, triangleVertices);
917                 }
918         }
919
920         const auto                      vertexBufferSize        = static_cast<VkDeviceSize>(de::dataSize(triangleVertices));
921         const auto                      vertexBufferInfo        = makeBufferCreateInfo(vertexBufferSize, (VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
922         BufferWithMemory        vertexBuffer            (vkd, device, alloc, vertexBufferInfo, MemoryRequirement::HostVisible);
923         auto&                           vertexBufferAlloc       = vertexBuffer.getAllocation();
924         const auto                      vertexBufferOffset      = vertexBufferAlloc.getOffset();
925         void*                           vertexBufferData        = vertexBufferAlloc.getHostPtr();
926
927         deMemcpy(vertexBufferData, triangleVertices.data(), de::dataSize(triangleVertices));
928         flushAlloc(vkd, device, vertexBufferAlloc);
929
930         // Index buffer if needed.
931         de::MovePtr<BufferWithMemory>   indexBuffer;
932         VkDeviceSize                                    indexBufferOffset = 0ull;
933
934         if (isIndexed)
935         {
936                 // Indices will be given in reverse order, so they effectively also make the triangles have reverse winding order.
937                 std::vector<deUint32> indices;
938                 indices.reserve(vertexCount);
939                 for (deUint32 i = 0u; i < vertexCount; ++i)
940                         indices.push_back(vertexCount - i - 1u);
941
942                 const auto      indexBufferSize         = static_cast<VkDeviceSize>(de::dataSize(indices));
943                 const auto      indexBufferInfo         = makeBufferCreateInfo(indexBufferSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
944                                         indexBuffer                     = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, alloc, indexBufferInfo, MemoryRequirement::HostVisible));
945                 auto&           indexBufferAlloc        = indexBuffer->getAllocation();
946                                         indexBufferOffset       = indexBufferAlloc.getOffset();
947                 void*           indexBufferData         = indexBufferAlloc.getHostPtr();
948
949                 deMemcpy(indexBufferData, indices.data(), de::dataSize(indices));
950                 flushAlloc(vkd, device, indexBufferAlloc);
951         }
952
953         // Prepare draw information.
954         const auto offsetType   = (m_params.vertexOffset ? m_params.vertexOffset->offsetType : tcu::nothing<VertexOffsetType>());
955         const auto vertexOffset = static_cast<deInt32>(extraVertices);
956
957         DrawInfoPacker drawInfos(m_params.drawType, offsetType, m_params.stride, m_params.drawCount, m_params.seed);
958
959         if (m_params.drawCount > 0u)
960         {
961                 deUint32 vertexIndex = 0u;
962                 for (deUint32 drawIdx = 0u; drawIdx < m_params.drawCount; ++drawIdx)
963                 {
964                         // For indexed draws in mixed offset mode, taking into account vertex indices have been stored in reversed order and
965                         // there may be a padding in the vertex buffer after the first verticesPerDraw vertices, we need to use offset 0 in the
966                         // last draw call. That draw will contain the indices for the first verticesPerDraw vertices, which are stored without
967                         // any offset, while other draw calls will use indices which are off by extraVertices vertices. This will make sure not
968                         // every draw call will use the same offset and the implementation handles that.
969                         const auto drawOffset = ((isIndexed && (!isMixedMode || (moreThanOneDraw && drawIdx < m_params.drawCount - 1u))) ? vertexOffset : 0);
970                         drawInfos.addDrawInfo(vertexIndex, verticesPerDraw, drawOffset);
971                         vertexIndex += verticesPerDraw;
972                 }
973         }
974
975         beginCommandBuffer(vkd, cmdBuffer);
976
977         // Draw stuff.
978         std::vector<VkClearValue> clearValues;
979         clearValues.reserve(2u);
980         clearValues.push_back(makeClearValueColorU32(0u, 0u, 0u, 0u));
981         clearValues.push_back(makeClearValueDepthStencil(((isMosaic || isIndexed) ? 0.0f : 1.0f), 0u));
982
983         beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissor, static_cast<deUint32>(clearValues.size()), de::dataOrNull(clearValues));
984
985         for (deUint32 layerIdx = 0u; layerIdx < imageLayers; ++layerIdx)
986         {
987                 if (layerIdx > 0u)
988                         vkd.cmdNextSubpass(cmdBuffer, VK_SUBPASS_CONTENTS_INLINE);
989
990                 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelines[layerIdx].get());
991                 vkd.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &vertexBuffer.get(), &vertexBufferOffset);
992                 if (isIndexed)
993                         vkd.cmdBindIndexBuffer(cmdBuffer, indexBuffer->get(), indexBufferOffset, VK_INDEX_TYPE_UINT32);
994
995                 if (isIndexed)
996                 {
997                         const auto drawInfoPtr  = reinterpret_cast<const VkMultiDrawIndexedInfoEXT*>(drawInfos.drawInfoData());
998                         const auto offsetPtr    = (isMixedMode ? nullptr : &vertexOffset);
999                         vkd.cmdDrawMultiIndexedEXT(cmdBuffer, drawInfos.drawInfoCount(), drawInfoPtr, m_params.instanceCount, m_params.firstInstance, drawInfos.stride(), offsetPtr);
1000                 }
1001                 else
1002                 {
1003                         const auto drawInfoPtr = reinterpret_cast<const VkMultiDrawInfoEXT*>(drawInfos.drawInfoData());
1004                         vkd.cmdDrawMultiEXT(cmdBuffer, drawInfos.drawInfoCount(), drawInfoPtr, m_params.instanceCount, m_params.firstInstance, drawInfos.stride());
1005                 }
1006         }
1007
1008         endRenderPass(vkd, cmdBuffer);
1009
1010         // Prepare images for copying.
1011         const auto colorBufferBarrier = makeImageMemoryBarrier(
1012                 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
1013                 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1014                 colorBuffer.get(), colorSubresourceRange);
1015         vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &colorBufferBarrier);
1016
1017         const auto dsBufferBarrier = makeImageMemoryBarrier(
1018                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
1019                 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1020                 dsBuffer.get(), dsSubresourceRange);
1021         vkd.cmdPipelineBarrier(cmdBuffer, (VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT), VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &dsBufferBarrier);
1022
1023         // Copy images to output buffers.
1024         for (deUint32 layerIdx = 0u; layerIdx < imageLayers; ++layerIdx)
1025         {
1026                 const auto colorSubresourceLayers       = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, layerIdx, 1u);
1027                 const auto colorCopyRegion                      = makeBufferImageCopy(imageExtent, colorSubresourceLayers);
1028                 vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, outputBuffers[layerIdx]->get(), 1u, &colorCopyRegion);
1029         }
1030
1031         // Note: this only copies the stencil aspect. See stencilOutBuffer creation.
1032         for (deUint32 layerIdx = 0u; layerIdx < imageLayers; ++layerIdx)
1033         {
1034                 const auto stencilSubresourceLayers     = makeImageSubresourceLayers(VK_IMAGE_ASPECT_STENCIL_BIT, 0u, layerIdx, 1u);
1035                 const auto stencilCopyRegion            = makeBufferImageCopy(imageExtent, stencilSubresourceLayers);
1036                 vkd.cmdCopyImageToBuffer(cmdBuffer, dsBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, stencilOutBuffers[layerIdx]->get(), 1u, &stencilCopyRegion);
1037         }
1038
1039         // Prepare buffers for host reading.
1040         const auto outputBufferBarrier          = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1041         vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &outputBufferBarrier, 0u, nullptr, 0u, nullptr);
1042
1043         endCommandBuffer(vkd, cmdBuffer);
1044         submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1045
1046         // Read output buffers and verify their contents.
1047
1048         // With stride zero, mosaic meshes increment the stencil buffer as many times as draw operations for affected pixels and
1049         // overlapping meshes increment the stencil buffer only in the first draw operation (the rest fail the depth test) as many times
1050         // as triangles per draw.
1051         //
1052         // With nonzero stride, mosaic meshes increment the stencil buffer once per pixel. Overlapping meshes increment it once per
1053         // triangle.
1054         const auto      stencilIncrements               =       ((m_params.stride == 0u)
1055                                                                                         ? (isMosaic ? drawInfos.drawInfoCount() : trianglesPerDraw)
1056                                                                                         : (isMosaic ? 1u : triangleCount));
1057         const auto      maxInstanceIndex                = m_params.maxInstanceIndex();
1058         const auto      colorVerificationFormat = mapVkFormat(getVerificationFormat());
1059         const auto      iWidth                                  = static_cast<int>(imageExtent.width);
1060         const auto      iHeight                                 = static_cast<int>(imageExtent.height);
1061         auto&           log                                             = m_context.getTestContext().getLog();
1062         const auto      logMode                                 = tcu::CompareLogMode::COMPARE_LOG_ON_ERROR;
1063
1064         for (deUint32 layerIdx = 0u; layerIdx < imageLayers; ++layerIdx)
1065         {
1066                 auto& outputBufferAlloc = outputBuffers[layerIdx]->getAllocation();
1067                 invalidateAlloc(vkd, device, outputBufferAlloc);
1068                 const void* outputBufferData = outputBufferAlloc.getHostPtr();
1069
1070                 auto& stencilOutBufferAlloc = stencilOutBuffers[layerIdx]->getAllocation();
1071                 invalidateAlloc(vkd, device, stencilOutBufferAlloc);
1072                 const void* stencilOutBufferData = stencilOutBufferAlloc.getHostPtr();
1073
1074                 tcu::ConstPixelBufferAccess     colorAccess                             (colorVerificationFormat, iWidth, iHeight, 1, outputBufferData);
1075                 tcu::ConstPixelBufferAccess     stencilAccess                   (tcuStencilFmt, iWidth, iHeight, 1, stencilOutBufferData);
1076
1077                 // Generate reference images.
1078                 tcu::TextureLevel                       refColorLevel           (colorVerificationFormat, iWidth, iHeight);
1079                 tcu::PixelBufferAccess          refColorAccess          = refColorLevel.getAccess();
1080                 tcu::TextureLevel                       refStencilLevel         (tcuStencilFmt, iWidth, iHeight);
1081                 tcu::PixelBufferAccess          refStencilAccess        = refStencilLevel.getAccess();
1082                 tcu::IVec4                                      referenceColor;
1083                 int                                                     referenceStencil;
1084
1085                 for (int y = 0; y < iHeight; ++y)
1086                 for (int x = 0; x < iWidth; ++x)
1087                 {
1088                         const auto pixelNumber          = static_cast<deUint32>(y * iWidth + x);
1089                         const auto triangleIndex        = (isIndexed ? (pixelCount - 1u - pixelNumber) : pixelNumber); // Reverse order for indexed draws.
1090
1091                         if (m_params.instanceCount == 0u || drawInfos.drawInfoCount() == 0u ||
1092                                 (m_params.stride == 0u && triangleIndex >= trianglesPerDraw && isMosaic))
1093                         {
1094                                 // Some pixels may not be drawn into when there are no instances or draws, or when the stride is zero in mosaic mode.
1095                                 referenceColor          = tcu::IVec4(0, 0, 0, 0);
1096                                 referenceStencil        = 0;
1097                         }
1098                         else
1099                         {
1100                                 // This must match the vertex shader.
1101                                 //
1102                                 // With stride zero, the same block is drawn over and over again in each draw call. This affects both the draw index and
1103                                 // the values in the depth/stencil buffer and, with overlapping meshes, only the first draw passes the depth test.
1104                                 //
1105                                 // With nonzero stride, the draw index depends on the triangle index and the number of triangles per draw and, for
1106                                 // overlapping meshes, the draw index is always the last one.
1107                                 const auto drawIndex =  (m_params.stride == 0u
1108                                                                                 ? (isMosaic ? (drawInfos.drawInfoCount() - 1u) : 0u)
1109                                                                                 : (isMosaic ? (triangleIndex / trianglesPerDraw) : (drawInfos.drawInfoCount() - 1u)));
1110                                 referenceColor = tcu::IVec4(
1111                                         static_cast<int>((drawIndex >> 8) & 0xFFu),
1112                                         static_cast<int>((drawIndex     ) & 0xFFu),
1113                                         static_cast<int>(255u - maxInstanceIndex),
1114                                         static_cast<int>(255u - layerIdx));
1115
1116                                 referenceStencil = static_cast<int>((m_params.instanceCount * stencilIncrements) % 256u); // VK_STENCIL_OP_INCREMENT_AND_WRAP.
1117                         }
1118
1119                         refColorAccess.setPixel(referenceColor, x, y);
1120                         refStencilAccess.setPixStencil(referenceStencil, x, y);
1121                 }
1122
1123                 const auto layerIdxStr          = de::toString(layerIdx);
1124                 const auto colorSetName         = "ColorTestResultLayer" + layerIdxStr;
1125                 const auto stencilSetName       = "StencilTestResultLayer" + layerIdxStr;
1126
1127                 if (!tcu::intThresholdCompare(log, colorSetName.c_str(), "", refColorAccess, colorAccess, tcu::UVec4(0u, 0u, 0u, 0u), logMode))
1128                         return tcu::TestStatus::fail("Color image comparison failed; check log for more details");
1129
1130                 if (!tcu::dsThresholdCompare(log, stencilSetName.c_str(), "", refStencilAccess, stencilAccess, 0.0f, logMode))
1131                         return tcu::TestStatus::fail("Stencil image comparison failed; check log for more details");
1132         }
1133
1134         return tcu::TestStatus::pass("Pass");
1135 }
1136
1137 } // anonymous
1138
1139 tcu::TestCaseGroup*     createDrawMultiExtTests (tcu::TestContext& testCtx)
1140 {
1141         using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
1142
1143         GroupPtr drawMultiGroup (new tcu::TestCaseGroup(testCtx, "multi_draw", "VK_EXT_multi_draw tests"));
1144
1145         const struct
1146         {
1147                 MeshType        meshType;
1148                 const char*     name;
1149         } meshTypeCases[] =
1150         {
1151                 { MeshType::MOSAIC,                     "mosaic"                },
1152                 { MeshType::OVERLAPPING,        "overlapping"   },
1153         };
1154
1155         const struct
1156         {
1157                 DrawType        drawType;
1158                 const char*     name;
1159         } drawTypeCases[] =
1160         {
1161                 { DrawType::NORMAL,             "normal"        },
1162                 { DrawType::INDEXED,    "indexed"       },
1163         };
1164
1165         const struct
1166         {
1167                 tcu::Maybe<VertexOffsetType>    vertexOffsetType;
1168                 const char*                                             name;
1169         } offsetTypeCases[] =
1170         {
1171                 { tcu::nothing<VertexOffsetType>(),             ""                      },
1172                 { VertexOffsetType::MIXED,                              "mixed"         },
1173                 { VertexOffsetType::CONSTANT_RANDOM,    "random"        },
1174                 { VertexOffsetType::CONSTANT_PACK,              "packed"        },
1175         };
1176
1177         const struct
1178         {
1179                 deUint32        drawCount;
1180                 const char*     name;
1181         } drawCountCases[] =
1182         {
1183                 { 0u,                                   "no_draws"      },
1184                 { 1u,                                   "one_draw"      },
1185                 { 16u,                                  "16_draws"      },
1186                 { getTriangleCount(),   "max_draws"     },
1187         };
1188
1189         const struct
1190         {
1191                 int                     extraBytes;
1192                 const char*     name;
1193         } strideCases[] =
1194         {
1195                 { -1,           "stride_zero"           },
1196                 {  0,           "standard_stride"       },
1197                 {  4,           "stride_extra_4"        },
1198                 { 12,           "stride_extra_12"       },
1199         };
1200
1201         const struct
1202         {
1203                 deUint32        firstInstance;
1204                 deUint32        instanceCount;
1205                 const char*     name;
1206         } instanceCases[] =
1207         {
1208                 {       0u,             0u,             "no_instances"                  },
1209                 {       0u,             1u,             "1_instance"                    },
1210                 {       0u,             10u,    "10_instances"                  },
1211                 {       3u,             2u,             "2_instances_base_3"    },
1212         };
1213
1214         const struct
1215         {
1216                 bool            useTessellation;
1217                 bool            useGeometry;
1218                 const char*     name;
1219         } shaderCases[] =
1220         {
1221                 { false,        false,          "vert_only"     },
1222                 { false,        true,           "with_geom"     },
1223                 { true,         false,          "with_tess"     },
1224                 { true,         true,           "tess_geom"     },
1225         };
1226
1227         const struct
1228         {
1229                 bool            multiview;
1230                 const char*     name;
1231         } multiviewCases[] =
1232         {
1233                 { false,        "single_view"   },
1234                 { true,         "multiview"             },
1235         };
1236
1237         constexpr deUint32 kSeed = 1621260419u;
1238
1239         for (const auto& meshTypeCase : meshTypeCases)
1240         {
1241                 GroupPtr meshTypeGroup(new tcu::TestCaseGroup(testCtx, meshTypeCase.name, ""));
1242
1243                 for (const auto& drawTypeCase : drawTypeCases)
1244                 {
1245                         for (const auto& offsetTypeCase : offsetTypeCases)
1246                         {
1247                                 const auto hasOffsetType = static_cast<bool>(offsetTypeCase.vertexOffsetType);
1248                                 if ((drawTypeCase.drawType == DrawType::NORMAL && hasOffsetType) ||
1249                                         (drawTypeCase.drawType == DrawType::INDEXED && !hasOffsetType))
1250                                 {
1251                                         continue;
1252                                 }
1253
1254                                 std::string drawGroupName = drawTypeCase.name;
1255                                 if (hasOffsetType)
1256                                         drawGroupName += std::string("_") + offsetTypeCase.name;
1257
1258                                 GroupPtr drawTypeGroup(new tcu::TestCaseGroup(testCtx, drawGroupName.c_str(), ""));
1259
1260                                 for (const auto& drawCountCase : drawCountCases)
1261                                 {
1262                                         GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, drawCountCase.name, ""));
1263
1264                                         for (const auto& strideCase : strideCases)
1265                                         {
1266                                                 GroupPtr strideGroup(new tcu::TestCaseGroup(testCtx, strideCase.name, ""));
1267
1268                                                 for (const auto& instanceCase : instanceCases)
1269                                                 {
1270                                                         GroupPtr instanceGroup(new tcu::TestCaseGroup(testCtx, instanceCase.name, ""));
1271
1272                                                         for (const auto& shaderCase : shaderCases)
1273                                                         {
1274                                                                 GroupPtr shaderGroup(new tcu::TestCaseGroup(testCtx, shaderCase.name, ""));
1275
1276                                                                 for (const auto& multiviewCase : multiviewCases)
1277                                                                 {
1278                                                                         GroupPtr multiviewGroup(new tcu::TestCaseGroup(testCtx, multiviewCase.name, ""));
1279
1280                                                                         const auto      isIndexed       = (drawTypeCase.drawType == DrawType::INDEXED);
1281                                                                         const auto      isPacked        = (offsetTypeCase.vertexOffsetType && *offsetTypeCase.vertexOffsetType == VertexOffsetType::CONSTANT_PACK);
1282                                                                         const auto      baseStride      = ((isIndexed && !isPacked) ? sizeof(VkMultiDrawIndexedInfoEXT) : sizeof(VkMultiDrawInfoEXT));
1283                                                                         const auto&     extraBytes      = strideCase.extraBytes;
1284                                                                         const auto      testOffset      = (isIndexed ? VertexOffsetParams{*offsetTypeCase.vertexOffsetType, 0u } : tcu::nothing<VertexOffsetParams>());
1285                                                                         deUint32        testStride      = 0u;
1286
1287                                                                         if (extraBytes >= 0)
1288                                                                                 testStride = static_cast<deUint32>(baseStride) + static_cast<deUint32>(extraBytes);
1289
1290                                                                         // For overlapping triangles we will skip instanced drawing.
1291                                                                         if (instanceCase.instanceCount > 1u && meshTypeCase.meshType == MeshType::OVERLAPPING)
1292                                                                                 continue;
1293
1294                                                                         TestParams params =
1295                                                                         {
1296                                                                                 meshTypeCase.meshType,                  //      MeshType                                                meshType;
1297                                                                                 drawTypeCase.drawType,                  //      DrawType                                                drawType;
1298                                                                                 drawCountCase.drawCount,                //      deUint32                                                drawCount;
1299                                                                                 instanceCase.instanceCount,             //      deUint32                                                instanceCount;
1300                                                                                 instanceCase.firstInstance,             //      deUint32                                                firstInstance;
1301                                                                                 testStride,                                             //      deUint32                                                stride;
1302                                                                                 testOffset,                                             //      tcu::Maybe<VertexOffsetParams>> vertexOffset;   // Only used for indexed draws.
1303                                                                                 kSeed,                                                  //      deUint32                                                seed;
1304                                                                                 shaderCase.useTessellation,             //      bool                                                    useTessellation;
1305                                                                                 shaderCase.useGeometry,                 //      bool                                                    useGeometry;
1306                                                                                 multiviewCase.multiview,                //      bool                                                    multiview;
1307                                                                         };
1308
1309                                                                         multiviewGroup->addChild(new MultiDrawTest(testCtx, "no_offset", "", params));
1310
1311                                                                         if (isIndexed)
1312                                                                         {
1313                                                                                 params.vertexOffset->offset = 6u;
1314                                                                                 multiviewGroup->addChild(new MultiDrawTest(testCtx, "offset_6", "", params));
1315                                                                         }
1316
1317                                                                         shaderGroup->addChild(multiviewGroup.release());
1318                                                                 }
1319
1320                                                                 instanceGroup->addChild(shaderGroup.release());
1321                                                         }
1322
1323                                                         strideGroup->addChild(instanceGroup.release());
1324                                                 }
1325
1326                                                 drawCountGroup->addChild(strideGroup.release());
1327                                         }
1328
1329                                         drawTypeGroup->addChild(drawCountGroup.release());
1330                                 }
1331
1332                                 meshTypeGroup->addChild(drawTypeGroup.release());
1333                         }
1334                 }
1335
1336                 drawMultiGroup->addChild(meshTypeGroup.release());
1337         }
1338
1339         return drawMultiGroup.release();
1340 }
1341
1342 } // Draw
1343 } // vkt