1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2021 The Khronos Group Inc.
6 * Copyright (c) 2021 Valve Corporation.
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * \brief Mesh Shader Query Tests for VK_EXT_mesh_shader
23 *//*--------------------------------------------------------------------*/
25 #include "vktMeshShaderQueryTestsEXT.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28 #include "vktTestCaseUtil.hpp"
30 #include "vkImageWithMemory.hpp"
31 #include "vkBufferWithMemory.hpp"
32 #include "vkImageUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkObjUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkBarrierUtil.hpp"
38 #include "tcuImageCompare.hpp"
39 #include "tcuTextureUtil.hpp"
41 #include "deRandom.hpp"
42 #include "deUniquePtr.hpp"
62 using BufferWithMemoryPtr = de::MovePtr<BufferWithMemory>;
64 constexpr uint32_t kImageWidth = 32u;
65 constexpr uint32_t kMeshWorkGroupsPerCall = 4u;
66 constexpr uint32_t kTaskWorkGroupsPerCall = 2u;
67 constexpr uint32_t kMeshWorkGroupsPerTask = kMeshWorkGroupsPerCall / kTaskWorkGroupsPerCall;
69 constexpr uint32_t kMeshLocalInvocationsX = 10u;
70 constexpr uint32_t kMeshLocalInvocationsY = 4u;
71 constexpr uint32_t kMeshLocalInvocationsZ = 1u;
72 constexpr uint32_t kMeshLocalInvocations = kMeshLocalInvocationsX * kMeshLocalInvocationsY * kMeshLocalInvocationsZ;
74 constexpr uint32_t kTaskLocalInvocationsX = 1u;
75 constexpr uint32_t kTaskLocalInvocationsY = 4u;
76 constexpr uint32_t kTaskLocalInvocationsZ = 6u;
77 constexpr uint32_t kTaskLocalInvocations = kTaskLocalInvocationsX * kTaskLocalInvocationsY * kTaskLocalInvocationsZ;
79 constexpr VkDeviceSize k64sz = static_cast<VkDeviceSize>(sizeof(uint64_t));
80 constexpr VkDeviceSize k32sz = static_cast<VkDeviceSize>(sizeof(uint32_t));
89 enum class DrawCallType
96 enum class GeometryType
103 std::string toString (GeometryType geometryType)
106 switch (geometryType)
108 case GeometryType::POINTS: result = "points"; break;
109 case GeometryType::LINES: result = "lines"; break;
110 case GeometryType::TRIANGLES: result = "triangles"; break;
118 uint32_t vertsPerPrimitive (GeometryType geometryType)
120 uint32_t vertices = 0u;
121 switch (geometryType)
123 case GeometryType::POINTS: vertices = 1u; break;
124 case GeometryType::LINES: vertices = 2u; break;
125 case GeometryType::TRIANGLES: vertices = 3u; break;
136 NONE_WITH_HOST, // After checking results normally, reset query from the host and verify availability.
141 enum class AccessMethod
147 void checkGetQueryRes(VkResult result, bool allowNotReady)
149 if (result == VK_SUCCESS || (result == VK_NOT_READY && allowNotReady))
152 const auto msg = getResultStr(result);
153 TCU_FAIL(msg.toString());
156 // The pseudrandom number generator will be used in the test case and test instance, so we use two seeds per case.
157 uint32_t getNewSeed (void)
159 static uint32_t seed = 1656078156u;
160 uint32_t returnedSeed = seed;
168 std::vector<QueryType> queryTypes;
169 std::vector<uint32_t> drawBlocks;
170 DrawCallType drawCall;
171 GeometryType geometry;
175 bool availabilityBit;
178 bool insideRenderPass;
182 void swap (TestParams& other)
184 std::swap(randomSeed, other.randomSeed);
185 queryTypes.swap(other.queryTypes);
186 drawBlocks.swap(other.drawBlocks);
187 std::swap(drawCall, other.drawCall);
188 std::swap(geometry, other.geometry);
189 std::swap(resetType, other.resetType);
190 std::swap(access, other.access);
191 std::swap(use64Bits, other.use64Bits);
192 std::swap(availabilityBit, other.availabilityBit);
193 std::swap(waitBit, other.waitBit);
194 std::swap(useTaskShader, other.useTaskShader);
195 std::swap(insideRenderPass, other.insideRenderPass);
196 std::swap(useSecondary, other.useSecondary);
197 std::swap(multiView, other.multiView);
201 : randomSeed (getNewSeed())
204 , drawCall (DrawCallType::DIRECT)
205 , geometry (GeometryType::POINTS)
206 , resetType (ResetCase::NONE)
207 , access (AccessMethod::COPY)
209 , availabilityBit (false)
211 , useTaskShader (false)
212 , insideRenderPass (false)
213 , useSecondary (false)
217 TestParams (const TestParams& other)
218 : randomSeed (other.randomSeed)
219 , queryTypes (other.queryTypes)
220 , drawBlocks (other.drawBlocks)
221 , drawCall (other.drawCall)
222 , geometry (other.geometry)
223 , resetType (other.resetType)
224 , access (other.access)
225 , use64Bits (other.use64Bits)
226 , availabilityBit (other.availabilityBit)
227 , waitBit (other.waitBit)
228 , useTaskShader (other.useTaskShader)
229 , insideRenderPass (other.insideRenderPass)
230 , useSecondary (other.useSecondary)
231 , multiView (other.multiView)
234 TestParams (TestParams&& other)
240 uint32_t getTotalDrawCount (void) const
242 const uint32_t callCount = std::accumulate(drawBlocks.begin(), drawBlocks.end(), 0u);
246 uint32_t getImageHeight (void) const
248 return getTotalDrawCount() * kMeshWorkGroupsPerCall;
251 // The goal is dispatching 4 mesh work groups per draw call in total. When not using task shaders, we dispatch that number
252 // directly. When using task shaders, we dispatch 2 task work groups that will dispatch 2 mesh work groups each. The axis will
253 // be pseudorandomly chosen in each case.
254 uint32_t getDrawGroupCount (void) const
256 return (useTaskShader ? kTaskWorkGroupsPerCall : kMeshWorkGroupsPerCall);
259 // Gets the right query result flags for the current parameters.
260 VkQueryResultFlags getQueryResultFlags (void) const
262 const VkQueryResultFlags queryResultFlags = ( (use64Bits ? VK_QUERY_RESULT_64_BIT : 0)
263 | (availabilityBit ? VK_QUERY_RESULT_WITH_AVAILABILITY_BIT : 0)
264 | (waitBit ? VK_QUERY_RESULT_WAIT_BIT : VK_QUERY_RESULT_PARTIAL_BIT) );
265 return queryResultFlags;
268 // Queries will be inherited if they are started outside of a render pass and using secondary command buffers.
269 // - If secondary command buffers are not used, nothing will be inherited.
270 // - If secondary command buffers are used but queries start inside of a render pass, queries will run entirely inside the secondary command buffer.
271 bool areQueriesInherited (void) const
273 return (useSecondary && !insideRenderPass);
277 bool hasQueryType (QueryType queryType) const
279 return de::contains(queryTypes.begin(), queryTypes.end(), queryType);
283 bool hasPrimitivesQuery (void) const
285 return hasQueryType(QueryType::PRIMITIVES);
288 bool hasMeshInvStat (void) const
290 return hasQueryType(QueryType::MESH_INVOCATIONS);
293 bool hasTaskInvStat (void) const
295 return hasQueryType(QueryType::TASK_INVOCATIONS);
298 struct QuerySizesAndOffsets
300 VkDeviceSize queryItemSize;
301 VkDeviceSize primitivesQuerySize;
302 VkDeviceSize statsQuerySize;
303 VkDeviceSize statsQueryOffset;
306 uint32_t getViewCount (void) const
308 return (multiView ? 2u : 1u);
311 QuerySizesAndOffsets getQuerySizesAndOffsets (void) const
313 QuerySizesAndOffsets sizesAndOffsets;
314 const VkDeviceSize extraQueryItems = (availabilityBit ? 1ull : 0ull);
315 const VkDeviceSize viewMultiplier = getViewCount();
317 sizesAndOffsets.queryItemSize = (use64Bits ? k64sz : k32sz);
318 sizesAndOffsets.primitivesQuerySize = (extraQueryItems + 1ull) * sizesAndOffsets.queryItemSize;
319 sizesAndOffsets.statsQuerySize = (extraQueryItems + (hasTaskInvStat() ? 1ull : 0ull) + (hasMeshInvStat() ? 1ull : 0ull)) * sizesAndOffsets.queryItemSize;
320 sizesAndOffsets.statsQueryOffset = (hasPrimitivesQuery() ? (sizesAndOffsets.primitivesQuerySize * viewMultiplier) : 0ull);
322 return sizesAndOffsets;
326 class MeshQueryCase : public vkt::TestCase
329 MeshQueryCase (tcu::TestContext& testCtx, const std::string& name, const std::string& description, TestParams&& params)
330 : vkt::TestCase (testCtx, name, description)
331 , m_params (std::move(params))
333 virtual ~MeshQueryCase (void) {}
335 void initPrograms (vk::SourceCollections& programCollection) const override;
336 TestInstance* createInstance (Context& context) const override;
337 void checkSupport (Context& context) const override;
343 class MeshQueryInstance : public vkt::TestInstance
346 MeshQueryInstance (Context& context, const TestParams& params)
347 : vkt::TestInstance (context)
349 , m_rnd (params.randomSeed + 1u) // Add 1 to make the instance seed different.
350 , m_indirectBuffer ()
351 , m_indirectCountBuffer ()
352 , m_fence (createFence(context.getDeviceInterface(), context.getDevice()))
354 virtual ~MeshQueryInstance (void) {}
356 Move<VkRenderPass> makeCustomRenderPass (const DeviceInterface& vkd, VkDevice device, uint32_t layerCount, VkFormat format);
357 tcu::TestStatus iterate (void) override;
360 VkDrawMeshTasksIndirectCommandEXT getRandomShuffle (uint32_t groupCount);
361 void recordDraws (const VkCommandBuffer cmdBuffer, const VkPipeline pipeline, const VkPipelineLayout layout);
362 void beginFirstQueries (const VkCommandBuffer cmdBuffer, const std::vector<VkQueryPool>& queryPools) const;
363 void endFirstQueries (const VkCommandBuffer cmdBuffer, const std::vector<VkQueryPool>& queryPools) const;
364 void resetFirstQueries (const VkCommandBuffer cmdBuffer, const std::vector<VkQueryPool>& queryPools, const uint32_t queryCount) const;
365 void submitCommands (const VkCommandBuffer cmdBuffer) const;
366 void waitForFence () const;
368 const TestParams* m_params;
370 BufferWithMemoryPtr m_indirectBuffer;
371 BufferWithMemoryPtr m_indirectCountBuffer;
372 Move<VkFence> m_fence;
375 void MeshQueryCase::initPrograms (vk::SourceCollections &programCollection) const
377 const auto meshBuildOpts = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
378 const auto imageHeight = m_params.getImageHeight();
380 const std::string taskDataDecl =
381 "struct TaskData {\n"
382 " uint branch[" + std::to_string(kTaskLocalInvocations) + "];\n"
384 "taskPayloadSharedEXT TaskData td;\n"
387 std::ostringstream frag;
390 << (m_params.multiView ? "#extension GL_EXT_multiview : enable\n" : "")
391 << "layout (location=0) out vec4 outColor;\n"
392 << "void main (void) { outColor = vec4(0.0, " << (m_params.multiView ? "float(gl_ViewIndex)" : "0.0") << ", 1.0, 1.0); }\n"
394 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
396 std::ostringstream mesh;
399 << "#extension GL_EXT_mesh_shader : enable\n"
401 << "layout (local_size_x=" << kMeshLocalInvocationsX << ", local_size_y=" << kMeshLocalInvocationsY << ", local_size_z=" << kMeshLocalInvocationsZ << ") in;\n"
402 << "layout (" << toString(m_params.geometry) << ") out;\n"
403 << "layout (max_vertices=256, max_primitives=256) out;\n"
405 << "layout (push_constant, std430) uniform PushConstants {\n"
406 << " uint prevDrawCalls;\n"
411 if (m_params.useTaskShader)
412 mesh << taskDataDecl << "\n";
416 << "shared uint currentCol;\n"
418 << "void main (void)\n"
420 << " atomicExchange(currentCol, 0u);\n"
423 << " const uint colCount = uint(" << kImageWidth << ");\n"
424 << " const uint rowCount = uint(" << imageHeight << ");\n"
425 << " const uint rowsPerDraw = uint(" << kMeshWorkGroupsPerCall << ");\n"
427 << " const float pixWidth = 2.0 / float(colCount);\n"
428 << " const float pixHeight = 2.0 / float(rowCount);\n"
429 << " const float horDelta = pixWidth / 4.0;\n"
430 << " const float verDelta = pixHeight / 4.0;\n"
432 << " const uint DrawIndex = uint(gl_DrawID);\n"
433 << " const uint currentWGIndex = (" << (m_params.useTaskShader ? "2u * td.branch[min(gl_LocalInvocationIndex, " + std::to_string(kTaskLocalInvocations - 1u) + ")] + " : "") << "gl_WorkGroupID.x + gl_WorkGroupID.y + gl_WorkGroupID.z);\n"
434 << " const uint row = (pc.prevDrawCalls + DrawIndex) * rowsPerDraw + currentWGIndex;\n"
435 << " const uint vertsPerPrimitive = " << vertsPerPrimitive(m_params.geometry) << ";\n"
437 << " SetMeshOutputsEXT(32u, 32u);\n"
439 << " const uint col = atomicAdd(currentCol, 1);\n"
440 << " if (col < colCount)\n"
442 << " const float xCenter = (float(col) + 0.5) / colCount * 2.0 - 1.0;\n"
443 << " const float yCenter = (float(row) + 0.5) / rowCount * 2.0 - 1.0;\n"
445 << " const uint firstVert = col * vertsPerPrimitive;\n"
449 switch (m_params.geometry)
451 case GeometryType::POINTS:
453 << " gl_MeshVerticesEXT[firstVert].gl_Position = vec4(xCenter, yCenter, 0.0, 1.0);\n"
454 << " gl_MeshVerticesEXT[firstVert].gl_PointSize = 1.0;\n"
455 << " gl_PrimitivePointIndicesEXT[col] = firstVert;\n"
458 case GeometryType::LINES:
460 << " gl_MeshVerticesEXT[firstVert + 0].gl_Position = vec4(xCenter - horDelta, yCenter, 0.0, 1.0);\n"
461 << " gl_MeshVerticesEXT[firstVert + 1].gl_Position = vec4(xCenter + horDelta, yCenter, 0.0, 1.0);\n"
462 << " gl_PrimitiveLineIndicesEXT[col] = uvec2(firstVert, firstVert + 1);\n"
465 case GeometryType::TRIANGLES:
467 << " gl_MeshVerticesEXT[firstVert + 0].gl_Position = vec4(xCenter , yCenter - verDelta, 0.0, 1.0);\n"
468 << " gl_MeshVerticesEXT[firstVert + 1].gl_Position = vec4(xCenter - horDelta, yCenter + verDelta, 0.0, 1.0);\n"
469 << " gl_MeshVerticesEXT[firstVert + 2].gl_Position = vec4(xCenter + horDelta, yCenter + verDelta, 0.0, 1.0);\n"
470 << " gl_PrimitiveTriangleIndicesEXT[col] = uvec3(firstVert, firstVert + 1, firstVert + 2);\n"
482 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << meshBuildOpts;
484 if (m_params.useTaskShader)
486 // See TestParams::getDrawGroupCount().
487 de::Random rnd (m_params.randomSeed);
488 std::vector<uint32_t> meshTaskCount {kMeshWorkGroupsPerTask, 1u, 1u};
490 rnd.shuffle(meshTaskCount.begin(), meshTaskCount.end());
492 std::ostringstream task;
495 << "#extension GL_EXT_mesh_shader : enable\n"
497 << "layout (local_size_x=" << kTaskLocalInvocationsX << ", local_size_y=" << kTaskLocalInvocationsY << ", local_size_z=" << kTaskLocalInvocationsZ << ") in;\n"
503 << " td.branch[gl_LocalInvocationIndex] = gl_WorkGroupID.x + gl_WorkGroupID.y + gl_WorkGroupID.z;\n"
504 << " EmitMeshTasksEXT(" << meshTaskCount.at(0) << ", " << meshTaskCount.at(1) << ", " << meshTaskCount.at(2) << ");\n"
507 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << meshBuildOpts;
511 TestInstance* MeshQueryCase::createInstance (Context& context) const
513 return new MeshQueryInstance(context, m_params);
516 void MeshQueryCase::checkSupport (Context& context) const
518 checkTaskMeshShaderSupportEXT(context, m_params.useTaskShader/*requireTask*/, true/*requireMesh*/);
520 const auto& meshFeatures = context.getMeshShaderFeaturesEXT();
521 if (!meshFeatures.meshShaderQueries)
522 TCU_THROW(NotSupportedError, "meshShaderQueries not supported");
524 if (m_params.areQueriesInherited())
525 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_INHERITED_QUERIES);
527 if (m_params.resetType == ResetCase::NONE_WITH_HOST)
528 context.requireDeviceFunctionality("VK_EXT_host_query_reset");
530 if (m_params.multiView)
532 if (!meshFeatures.multiviewMeshShader)
533 TCU_THROW(NotSupportedError, "multiviewMeshShader not supported");
535 const auto& meshProperties = context.getMeshShaderPropertiesEXT();
536 if (meshProperties.maxMeshMultiviewViewCount < m_params.getViewCount())
537 TCU_THROW(NotSupportedError, "maxMeshMultiviewViewCount too low");
541 VkDrawMeshTasksIndirectCommandEXT MeshQueryInstance::getRandomShuffle (uint32_t groupCount)
543 std::array<uint32_t, 3> counts { groupCount, 1u, 1u };
544 m_rnd.shuffle(counts.begin(), counts.end());
546 const VkDrawMeshTasksIndirectCommandEXT result { counts[0], counts[1], counts[2] };
550 void MeshQueryInstance::recordDraws (const VkCommandBuffer cmdBuffer, const VkPipeline pipeline, const VkPipelineLayout layout)
552 const auto& vkd = m_context.getDeviceInterface();
553 const auto device = m_context.getDevice();
554 auto& alloc = m_context.getDefaultAllocator();
555 const auto drawGroupCount = m_params->getDrawGroupCount();
556 const auto pcSize = static_cast<uint32_t>(sizeof(uint32_t));
558 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
560 if (m_params->drawCall == DrawCallType::DIRECT)
562 uint32_t totalDrawCalls = 0u;
563 for (const auto& blockSize : m_params->drawBlocks)
565 for (uint32_t drawIdx = 0u; drawIdx < blockSize; ++drawIdx)
567 const auto counts = getRandomShuffle(drawGroupCount);
568 vkd.cmdPushConstants(cmdBuffer, layout, VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcSize, &totalDrawCalls);
569 vkd.cmdDrawMeshTasksEXT(cmdBuffer, counts.groupCountX, counts.groupCountY, counts.groupCountZ);
574 else if (m_params->drawCall == DrawCallType::INDIRECT || m_params->drawCall == DrawCallType::INDIRECT_WITH_COUNT)
576 if (m_params->drawBlocks.empty())
579 const auto totalDrawCount = m_params->getTotalDrawCount();
580 const auto cmdSize = static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandEXT));
582 std::vector<VkDrawMeshTasksIndirectCommandEXT> indirectCommands;
583 indirectCommands.reserve(totalDrawCount);
585 for (uint32_t i = 0u; i < totalDrawCount; ++i)
586 indirectCommands.emplace_back(getRandomShuffle(drawGroupCount));
588 // Copy the array to a host-visible buffer.
589 // Note: We make sure all indirect buffers are allocated with a non-zero size by adding cmdSize to the expected size.
590 const auto indirectBufferSize = de::dataSize(indirectCommands);
591 const auto indirectBufferCreateInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(indirectBufferSize + cmdSize), VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT);
593 m_indirectBuffer = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, indirectBufferCreateInfo, MemoryRequirement::HostVisible));
594 auto& indirectBufferAlloc = m_indirectBuffer->getAllocation();
595 void* indirectBufferData = indirectBufferAlloc.getHostPtr();
597 deMemcpy(indirectBufferData, indirectCommands.data(), indirectBufferSize);
598 flushAlloc(vkd, device, indirectBufferAlloc);
600 if (m_params->drawCall == DrawCallType::INDIRECT)
602 uint32_t accumulatedCount = 0u;
604 for (const auto& blockSize : m_params->drawBlocks)
606 const auto offset = static_cast<VkDeviceSize>(cmdSize * accumulatedCount);
607 vkd.cmdPushConstants(cmdBuffer, layout, VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcSize, &accumulatedCount);
608 vkd.cmdDrawMeshTasksIndirectEXT(cmdBuffer, m_indirectBuffer->get(), offset, blockSize, cmdSize);
609 accumulatedCount += blockSize;
614 // Copy the "block sizes" to a host-visible buffer.
615 const auto indirectCountBufferSize = de::dataSize(m_params->drawBlocks);
616 const auto indirectCountBufferCreateInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(indirectCountBufferSize + cmdSize), VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT);
618 m_indirectCountBuffer = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, indirectCountBufferCreateInfo, MemoryRequirement::HostVisible));
619 auto& indirectCountBufferAlloc = m_indirectCountBuffer->getAllocation();
620 void* indirectCountBufferData = indirectCountBufferAlloc.getHostPtr();
622 deMemcpy(indirectCountBufferData, m_params->drawBlocks.data(), indirectCountBufferSize);
623 flushAlloc(vkd, device, indirectCountBufferAlloc);
625 // Record indirect draws with count.
626 uint32_t accumulatedCount = 0u;
628 for (uint32_t countIdx = 0u; countIdx < m_params->drawBlocks.size(); ++countIdx)
630 const auto& blockSize = m_params->drawBlocks.at(countIdx);
631 const auto offset = static_cast<VkDeviceSize>(cmdSize * accumulatedCount);
632 const auto countOffset = static_cast<VkDeviceSize>(sizeof(uint32_t) * countIdx);
634 vkd.cmdPushConstants(cmdBuffer, layout, VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcSize, &accumulatedCount);
635 vkd.cmdDrawMeshTasksIndirectCountEXT(cmdBuffer, m_indirectBuffer->get(), offset, m_indirectCountBuffer->get(), countOffset, blockSize * 2u, cmdSize);
636 accumulatedCount += blockSize;
646 void MeshQueryInstance::beginFirstQueries (const VkCommandBuffer cmdBuffer, const std::vector<VkQueryPool>& queryPools) const
648 const auto& vkd = m_context.getDeviceInterface();
649 for (const auto& pool : queryPools)
650 vkd.cmdBeginQuery(cmdBuffer, pool, 0u, 0u);
653 void MeshQueryInstance::endFirstQueries (const VkCommandBuffer cmdBuffer, const std::vector<VkQueryPool>& queryPools) const
655 const auto& vkd = m_context.getDeviceInterface();
656 for (const auto& pool : queryPools)
657 vkd.cmdEndQuery(cmdBuffer, pool, 0u);
660 void MeshQueryInstance::resetFirstQueries (const VkCommandBuffer cmdBuffer, const std::vector<VkQueryPool>& queryPools, const uint32_t queryCount) const
662 const auto& vkd = m_context.getDeviceInterface();
663 for (const auto& pool : queryPools)
664 vkd.cmdResetQueryPool(cmdBuffer, pool, 0u, queryCount);
667 void MeshQueryInstance::submitCommands (const VkCommandBuffer cmdBuffer) const
669 const auto& vkd = m_context.getDeviceInterface();
670 const auto queue = m_context.getUniversalQueue();
672 const VkSubmitInfo submitInfo =
674 VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType sType;
675 nullptr, // const void* pNext;
676 0u, // deUint32 waitSemaphoreCount;
677 nullptr, // const VkSemaphore* pWaitSemaphores;
678 nullptr, // const VkPipelineStageFlags* pWaitDstStageMask;
679 1u, // deUint32 commandBufferCount;
680 &cmdBuffer, // const VkCommandBuffer* pCommandBuffers;
681 0u, // deUint32 signalSemaphoreCount;
682 nullptr, // const VkSemaphore* pSignalSemaphores;
685 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, m_fence.get()));
688 void MeshQueryInstance::waitForFence (void) const
690 const auto& vkd = m_context.getDeviceInterface();
691 const auto device = m_context.getDevice();
693 VK_CHECK(vkd.waitForFences(device, 1u, &m_fence.get(), VK_TRUE, ~0ull));
696 // Read query item from memory. Always returns uint64_t for convenience. Advances pointer to the next item.
697 uint64_t readFromPtrAndAdvance (uint8_t** const ptr, VkDeviceSize itemSize)
699 const auto itemSizeSz = static_cast<size_t>(itemSize);
700 uint64_t result = std::numeric_limits<uint64_t>::max();
702 if (itemSize == k64sz)
704 deMemcpy(&result, *ptr, itemSizeSz);
706 else if (itemSize == k32sz)
708 uint32_t aux = std::numeric_limits<uint32_t>::max();
709 deMemcpy(&aux, *ptr, itemSizeSz);
710 result = static_cast<uint64_t>(aux);
719 // General procedure to verify correctness of the availability bit, which does not depend on the exact query.
720 void readAndVerifyAvailabilityBit (uint8_t** const resultsPtr, VkDeviceSize itemSize, const TestParams& params, const std::string& queryName)
722 const uint64_t availabilityBitVal = readFromPtrAndAdvance(resultsPtr, itemSize);
724 if (params.resetType == ResetCase::BEFORE_ACCESS)
726 if (availabilityBitVal)
728 std::ostringstream msg;
729 msg << queryName << " availability bit expected to be zero due to reset before access, but found " << availabilityBitVal;
733 else if (params.waitBit)
735 if (!availabilityBitVal)
737 std::ostringstream msg;
738 msg << queryName << " availability expected to be true due to wait bit and not previous reset, but found " << availabilityBitVal;
744 // Verifies a query counter has the right value given the test parameters.
745 // - readVal is the reported counter value.
746 // - expectedMinVal and expectedMaxVal are the known right counts under "normal" circumstances.
747 // - The actual range of valid values will be adjusted depending on the test parameters (wait bit, reset, etc).
748 void verifyQueryCounter (uint64_t readVal, uint64_t expectedMinVal, uint64_t expectedMaxVal, const TestParams& params, const std::string& queryName)
750 uint64_t minVal = expectedMinVal;
751 uint64_t maxVal = expectedMaxVal;
753 const bool wasReset = (params.resetType == ResetCase::BEFORE_ACCESS);
755 if (!params.waitBit || wasReset)
761 if (!de::inRange(readVal, minVal, maxVal))
763 std::ostringstream msg;
764 msg << queryName << " not in expected range: " << readVal << " out of [" << minVal << ", " << maxVal << "]";
769 Move<VkRenderPass> MeshQueryInstance::makeCustomRenderPass (const DeviceInterface& vkd, VkDevice device, uint32_t layerCount, VkFormat format)
771 DE_ASSERT(layerCount > 0u);
773 const VkAttachmentDescription colorAttachmentDescription =
775 0u, // VkAttachmentDescriptionFlags flags
776 format, // VkFormat format
777 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples
778 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp
779 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp
780 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp
781 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp
782 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout
783 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout
786 const VkAttachmentReference colorAttachmentRef = makeAttachmentReference(0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
788 const VkSubpassDescription subpassDescription =
790 0u, // VkSubpassDescriptionFlags flags
791 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint
792 0u, // deUint32 inputAttachmentCount
793 nullptr, // const VkAttachmentReference* pInputAttachments
794 1u, // deUint32 colorAttachmentCount
795 &colorAttachmentRef, // const VkAttachmentReference* pColorAttachments
796 nullptr, // const VkAttachmentReference* pResolveAttachments
797 nullptr, // const VkAttachmentReference* pDepthStencilAttachment
798 0u, // deUint32 preserveAttachmentCount
799 nullptr // const deUint32* pPreserveAttachments
802 const uint32_t viewMask = ((1u << layerCount) - 1u);
803 const VkRenderPassMultiviewCreateInfo multiviewCreateInfo =
805 VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, // VkStructureType sType;
806 nullptr, // const void* pNext;
807 1u, // uint32_t subpassCount;
808 &viewMask, // const uint32_t* pViewMasks;
809 0u, // uint32_t dependencyCount;
810 nullptr, // const int32_t* pViewOffsets;
811 1u, // uint32_t correlationMaskCount;
812 &viewMask, // const uint32_t* pCorrelationMasks;
815 const VkRenderPassCreateInfo renderPassInfo =
817 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType
818 &multiviewCreateInfo, // const void* pNext
819 0u, // VkRenderPassCreateFlags flags
820 1u, // deUint32 attachmentCount
821 &colorAttachmentDescription, // const VkAttachmentDescription* pAttachments
822 1u, // deUint32 subpassCount
823 &subpassDescription, // const VkSubpassDescription* pSubpasses
824 0u, // deUint32 dependencyCount
825 nullptr, // const VkSubpassDependency* pDependencies
828 return createRenderPass(vkd, device, &renderPassInfo);
831 tcu::TestStatus MeshQueryInstance::iterate (void)
833 const auto& vkd = m_context.getDeviceInterface();
834 const auto device = m_context.getDevice();
835 auto& alloc = m_context.getDefaultAllocator();
836 const auto queue = m_context.getUniversalQueue();
837 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
839 const auto colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
840 const auto colorTcuFormat = mapVkFormat(colorFormat);
841 const auto colorExtent = makeExtent3D(kImageWidth, std::max(m_params->getImageHeight(), 1u), 1u);
842 const auto viewCount = m_params->getViewCount();
843 const tcu::IVec3 colorTcuExtent (static_cast<int>(colorExtent.width), static_cast<int>(colorExtent.height), static_cast<int>(viewCount));
844 const auto colorUsage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
845 const tcu::Vec4 clearColor (0.0f, 0.0f, 0.0f, 1.0f);
846 const auto expectedPrims = (m_params->getImageHeight() * kImageWidth);
847 const auto expectedTaskInv = (m_params->useTaskShader ? (m_params->getImageHeight() * kTaskLocalInvocations / 2u) : 0u);
848 const auto expectedMeshInv = m_params->getImageHeight() * kMeshLocalInvocations;
849 const auto imageViewType = ((viewCount > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
852 const VkImageCreateInfo colorBufferCreateInfo =
854 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
855 nullptr, // const void* pNext;
856 0u, // VkImageCreateFlags flags;
857 VK_IMAGE_TYPE_2D, // VkImageType imageType;
858 colorFormat, // VkFormat format;
859 colorExtent, // VkExtent3D extent;
860 1u, // uint32_t mipLevels;
861 viewCount, // uint32_t arrayLayers;
862 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
863 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
864 colorUsage, // VkImageUsageFlags usage;
865 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
866 0u, // uint32_t queueFamilyIndexCount;
867 nullptr, // const uint32_t* pQueueFamilyIndices;
868 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
871 const ImageWithMemory colorBuffer (vkd, device, alloc, colorBufferCreateInfo, MemoryRequirement::Any);
872 const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, viewCount);
873 const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, viewCount);
874 const auto colorView = makeImageView(vkd, device, colorBuffer.get(), imageViewType, colorFormat, colorSRR);
876 // Verification buffer.
877 DE_ASSERT(colorExtent.depth == 1u);
878 const VkDeviceSize verifBufferSize = colorExtent.width * colorExtent.height * viewCount * static_cast<VkDeviceSize>(tcu::getPixelSize(colorTcuFormat));
879 const auto verifBufferCreateInfo = makeBufferCreateInfo(verifBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
880 const BufferWithMemory verifBuffer (vkd, device, alloc, verifBufferCreateInfo, MemoryRequirement::HostVisible);
883 const auto& binaries = m_context.getBinaryCollection();
884 const auto taskModule = (binaries.contains("task")
885 ? createShaderModule(vkd, device, binaries.get("task"))
886 : Move<VkShaderModule>());
887 const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
888 const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
891 const auto pcSize = static_cast<uint32_t>(sizeof(uint32_t));
892 const auto pcRange = makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcSize);
893 const auto pipelineLayout = makePipelineLayout(vkd, device, DE_NULL, &pcRange);
895 // Render pass, framebuffer, viewports, scissors.
896 const auto renderPass = makeCustomRenderPass(vkd, device, viewCount, colorFormat);
897 const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), colorExtent.width, colorExtent.height);
899 const std::vector<VkViewport> viewports (1u, makeViewport(colorExtent));
900 const std::vector<VkRect2D> scissors (1u, makeRect2D(colorExtent));
902 const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
903 taskModule.get(), meshModule.get(), fragModule.get(),
904 renderPass.get(), viewports, scissors);
906 // Command pool and buffers.
907 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
908 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
909 const auto resetCmdBuffer = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
910 const auto cmdBuffer = cmdBufferPtr.get();
911 const auto rawPipeline = pipeline.get();
912 const auto rawPipeLayout = pipelineLayout.get();
914 Move<VkCommandBuffer> secCmdBufferPtr;
915 VkCommandBuffer secCmdBuffer = DE_NULL;
917 if (m_params->useSecondary)
919 secCmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY);
920 secCmdBuffer = secCmdBufferPtr.get();
923 // Create the query pools that we need.
924 Move<VkQueryPool> primitivesQueryPool;
925 Move<VkQueryPool> statsQueryPool;
927 const bool hasPrimitivesQuery = m_params->hasPrimitivesQuery();
928 const bool hasMeshInvStat = m_params->hasMeshInvStat();
929 const bool hasTaskInvStat = m_params->hasTaskInvStat();
930 const bool hasStatsQuery = (hasMeshInvStat || hasTaskInvStat);
932 std::vector<VkQueryPool> allQueryPools;
934 if (hasPrimitivesQuery)
936 const VkQueryPoolCreateInfo queryPoolCreateInfo =
938 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // VkStructureType sType;
939 nullptr, // const void* pNext;
940 0u, // VkQueryPoolCreateFlags flags;
941 VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT, // VkQueryType queryType;
942 viewCount, // uint32_t queryCount;
943 0u, // VkQueryPipelineStatisticFlags pipelineStatistics;
945 primitivesQueryPool = createQueryPool(vkd, device, &queryPoolCreateInfo);
946 allQueryPools.push_back(primitivesQueryPool.get());
949 const VkQueryPipelineStatisticFlags statQueryFlags =
950 ( (hasMeshInvStat ? VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT : 0)
951 | (hasTaskInvStat ? VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT : 0) );
955 const VkQueryPoolCreateInfo queryPoolCreateInfo =
957 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // VkStructureType sType;
958 nullptr, // const void* pNext;
959 0u, // VkQueryPoolCreateFlags flags;
960 VK_QUERY_TYPE_PIPELINE_STATISTICS, // VkQueryType queryType;
961 viewCount, // uint32_t queryCount;
962 statQueryFlags, // VkQueryPipelineStatisticFlags pipelineStatistics;
964 statsQueryPool = createQueryPool(vkd, device, &queryPoolCreateInfo);
965 allQueryPools.push_back(statsQueryPool.get());
968 // Some query result parameters.
969 const auto querySizesAndOffsets = m_params->getQuerySizesAndOffsets();
970 const size_t maxResultSize = k64sz * 10ull; // 10 items at most: (prim+avail+task+mesh+avail)*2.
971 const auto statsQueryOffsetSz = static_cast<size_t>(querySizesAndOffsets.statsQueryOffset);
973 // Create output buffer for the queries.
974 BufferWithMemoryPtr queryResultsBuffer;
975 if (m_params->access == AccessMethod::COPY)
977 const auto queryResultsBufferInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(maxResultSize), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
978 queryResultsBuffer = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, queryResultsBufferInfo, MemoryRequirement::HostVisible));
980 std::vector<uint8_t> queryResultsHostVec(maxResultSize, 0);
982 const auto statsDataHostVecPtr = queryResultsHostVec.data() + statsQueryOffsetSz;
983 const auto statsRemainingSize = maxResultSize - statsQueryOffsetSz;
985 // Result flags when obtaining query results.
986 const auto queryResultFlags = m_params->getQueryResultFlags();
988 // Reset queries before use.
989 // Queries will be reset in a separate command buffer to make sure they are always properly reset before use.
990 // We could do this with VK_EXT_host_query_reset too.
992 beginCommandBuffer(vkd, resetCmdBuffer.get());
993 resetFirstQueries(resetCmdBuffer.get(), allQueryPools, viewCount);
994 endCommandBuffer(vkd, resetCmdBuffer.get());
995 submitCommandsAndWait(vkd, device, queue, resetCmdBuffer.get());
998 // Command recording.
999 beginCommandBuffer(vkd, cmdBuffer);
1001 if (m_params->useSecondary)
1003 const VkCommandBufferInheritanceInfo inheritanceInfo =
1005 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // VkStructureType sType;
1006 nullptr, // const void* pNext;
1007 renderPass.get(), // VkRenderPass renderPass;
1008 0u, // uint32_t subpass;
1009 framebuffer.get(), // VkFramebuffer framebuffer;
1010 VK_FALSE, // VkBool32 occlusionQueryEnable;
1011 0u, // VkQueryControlFlags queryFlags;
1012 (m_params->areQueriesInherited() ? statQueryFlags : 0u), // VkQueryPipelineStatisticFlags pipelineStatistics;
1015 const auto secCmdBufferFlags = (VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT | VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
1017 const VkCommandBufferBeginInfo secBeginInfo =
1019 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // VkStructureType sType;
1020 nullptr, // const void* pNext;
1021 secCmdBufferFlags, // VkCommandBufferUsageFlags flags;
1022 &inheritanceInfo, // const VkCommandBufferInheritanceInfo* pInheritanceInfo;
1025 VK_CHECK(vkd.beginCommandBuffer(secCmdBuffer, &secBeginInfo));
1028 const auto subpassContents = (m_params->useSecondary ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE);
1032 // * Only primary, inside render pass
1033 // * Only primary, outside render pass
1034 // * Primary and secondary, inside render pass (query in secondary)
1035 // * Primary and secondary, outside render pass (query inheritance)
1037 if (!m_params->useSecondary)
1039 if (m_params->insideRenderPass)
1041 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor, subpassContents);
1042 beginFirstQueries(cmdBuffer, allQueryPools);
1043 recordDraws(cmdBuffer, rawPipeline, rawPipeLayout);
1044 endFirstQueries(cmdBuffer, allQueryPools);
1045 endRenderPass(vkd, cmdBuffer);
1049 DE_ASSERT(!m_params->multiView);
1050 beginFirstQueries(cmdBuffer, allQueryPools);
1051 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor, subpassContents);
1052 recordDraws(cmdBuffer, rawPipeline, rawPipeLayout);
1053 endRenderPass(vkd, cmdBuffer);
1054 endFirstQueries(cmdBuffer, allQueryPools);
1059 if (m_params->insideRenderPass) // Queries in secondary command buffer.
1061 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor, subpassContents);
1062 beginFirstQueries(secCmdBuffer, allQueryPools);
1063 recordDraws(secCmdBuffer, rawPipeline, rawPipeLayout);
1064 endFirstQueries(secCmdBuffer, allQueryPools);
1065 endCommandBuffer(vkd, secCmdBuffer);
1066 vkd.cmdExecuteCommands(cmdBuffer, 1u, &secCmdBuffer);
1067 endRenderPass(vkd, cmdBuffer);
1069 else // Inherited queries case.
1071 DE_ASSERT(!m_params->multiView);
1072 beginFirstQueries(cmdBuffer, allQueryPools);
1073 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor, subpassContents);
1074 recordDraws(secCmdBuffer, rawPipeline, rawPipeLayout);
1075 endCommandBuffer(vkd, secCmdBuffer);
1076 vkd.cmdExecuteCommands(cmdBuffer, 1u, &secCmdBuffer);
1077 endRenderPass(vkd, cmdBuffer);
1078 endFirstQueries(cmdBuffer, allQueryPools);
1082 // Render to copy barrier.
1084 const auto preCopyImgBarrier = makeImageMemoryBarrier(VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorBuffer.get(), colorSRR);
1085 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preCopyImgBarrier);
1088 if (m_params->resetType == ResetCase::BEFORE_ACCESS)
1089 resetFirstQueries(cmdBuffer, allQueryPools, viewCount);
1091 if (m_params->access == AccessMethod::COPY)
1093 if (hasPrimitivesQuery)
1094 vkd.cmdCopyQueryPoolResults(cmdBuffer, primitivesQueryPool.get(), 0u, viewCount, queryResultsBuffer->get(), 0ull, querySizesAndOffsets.primitivesQuerySize, queryResultFlags);
1097 vkd.cmdCopyQueryPoolResults(cmdBuffer, statsQueryPool.get(), 0u, viewCount, queryResultsBuffer->get(), querySizesAndOffsets.statsQueryOffset, querySizesAndOffsets.statsQuerySize, queryResultFlags);
1100 if (m_params->resetType == ResetCase::AFTER_ACCESS)
1101 resetFirstQueries(cmdBuffer, allQueryPools, viewCount);
1103 // Copy color attachment to verification buffer.
1105 const auto copyRegion = makeBufferImageCopy(colorExtent, colorSRL);
1106 vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verifBuffer.get(), 1u, ©Region);
1109 // This barrier applies to both the color verification buffer and the queries if they were copied.
1110 const auto postCopyBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1111 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyBarrier);
1113 endCommandBuffer(vkd, cmdBuffer);
1114 submitCommands(cmdBuffer);
1116 // When using GET, obtain results before actually waiting for the fence if possible. This way it's more interesting for cases
1117 // that do not use the wait bit.
1118 if (m_params->access == AccessMethod::GET)
1120 // When resetting queries before access, we need to make sure the reset operation has really taken place.
1121 if (m_params->resetType == ResetCase::BEFORE_ACCESS)
1124 const bool allowNotReady = !m_params->waitBit;
1126 if (hasPrimitivesQuery)
1128 const auto res = vkd.getQueryPoolResults(device, primitivesQueryPool.get(), 0u, viewCount, de::dataSize(queryResultsHostVec), queryResultsHostVec.data(), querySizesAndOffsets.primitivesQuerySize, queryResultFlags);
1129 checkGetQueryRes(res, allowNotReady);
1134 const auto res = vkd.getQueryPoolResults(device, statsQueryPool.get(), 0u, viewCount, statsRemainingSize, statsDataHostVecPtr, querySizesAndOffsets.statsQuerySize, queryResultFlags);
1135 checkGetQueryRes(res, allowNotReady);
1141 // Verify color buffer.
1143 auto& log = m_context.getTestContext().getLog();
1144 auto& verifBufferAlloc = verifBuffer.getAllocation();
1145 void* verifBufferData = verifBufferAlloc.getHostPtr();
1147 invalidateAlloc(vkd, device, verifBufferAlloc);
1149 tcu::ConstPixelBufferAccess verifAccess (colorTcuFormat, colorTcuExtent, verifBufferData);
1150 const tcu::Vec4 threshold (0.0f, 0.0f, 0.0f, 0.0f); // Results should be exact.
1152 for (int layer = 0; layer < colorTcuExtent.z(); ++layer)
1154 // This should match the fragment shader.
1155 const auto green = ((layer > 0) ? 1.0f : 0.0f);
1156 const auto referenceColor = ((m_params->getTotalDrawCount() > 0u) ? tcu::Vec4(0.0f, green, 1.0f, 1.0f) : clearColor);
1157 const auto layerAccess = tcu::getSubregion(verifAccess, 0, 0, layer, colorTcuExtent.x(), colorTcuExtent.y(), 1);
1159 if (!tcu::floatThresholdCompare(log, "Color Result", "", referenceColor, layerAccess, threshold, tcu::COMPARE_LOG_ON_ERROR))
1161 std::ostringstream msg;
1162 msg << "Color target mismatch at layer " << layer << "; check log for details";
1163 TCU_FAIL(msg.str());
1168 // Verify query results.
1170 const auto itemSize = querySizesAndOffsets.queryItemSize;
1171 uint8_t* resultsPtr = nullptr;
1173 if (m_params->access == AccessMethod::COPY)
1175 auto& queryResultsBufferAlloc = queryResultsBuffer->getAllocation();
1176 void* queryResultsBufferData = queryResultsBufferAlloc.getHostPtr();
1177 invalidateAlloc(vkd, device, queryResultsBufferAlloc);
1179 resultsPtr = reinterpret_cast<uint8_t*>(queryResultsBufferData);
1181 else if (m_params->access == AccessMethod::GET)
1183 resultsPtr = queryResultsHostVec.data();
1187 if (hasPrimitivesQuery)
1189 const std::string queryGroupName = "Primitive count";
1190 uint64_t totalPrimitiveCount = 0ull;
1192 for (uint32_t viewIndex = 0u; viewIndex < viewCount; ++viewIndex)
1194 const std::string queryName = queryGroupName + " for view " + std::to_string(viewIndex);
1195 const uint64_t primitiveCount = readFromPtrAndAdvance(&resultsPtr, itemSize);
1197 totalPrimitiveCount += primitiveCount;
1199 if (m_params->availabilityBit)
1200 readAndVerifyAvailabilityBit(&resultsPtr, itemSize, *m_params, queryName);
1203 verifyQueryCounter(totalPrimitiveCount, expectedPrims, expectedPrims * viewCount, *m_params, queryGroupName);
1208 const std::string queryGroupName = "Stats query";
1209 uint64_t totalTaskInvs = 0ull;
1210 uint64_t totalMeshInvs = 0ull;
1212 for (uint32_t viewIndex = 0u; viewIndex < viewCount; ++viewIndex)
1216 const uint64_t taskInvs = readFromPtrAndAdvance(&resultsPtr, itemSize);
1217 totalTaskInvs += taskInvs;
1222 const uint64_t meshInvs = readFromPtrAndAdvance(&resultsPtr, itemSize);
1223 totalMeshInvs += meshInvs;
1226 if (m_params->availabilityBit)
1228 const std::string queryName = queryGroupName + " for view " + std::to_string(viewIndex);
1229 readAndVerifyAvailabilityBit(&resultsPtr, itemSize, *m_params, queryGroupName);
1234 verifyQueryCounter(totalTaskInvs, expectedTaskInv, expectedTaskInv, *m_params, "Task invocations");
1237 verifyQueryCounter(totalMeshInvs, expectedMeshInv, expectedMeshInv * viewCount, *m_params, "Mesh invocations");
1241 if (m_params->resetType == ResetCase::NONE_WITH_HOST)
1243 // We'll reset the different queries that we used before and we'll retrieve results again with GET, forcing availability bit
1244 // and no wait bit. We'll verify availability bits are zero.
1245 uint8_t* resultsPtr = queryResultsHostVec.data();
1247 // New parameters, based on the existing ones, that match the behavior we expect below.
1248 TestParams postResetParams = *m_params;
1249 postResetParams.availabilityBit = true;
1250 postResetParams.waitBit = false;
1251 postResetParams.resetType = ResetCase::BEFORE_ACCESS;
1253 const auto postResetFlags = postResetParams.getQueryResultFlags();
1254 const auto newSizesAndOffsets = postResetParams.getQuerySizesAndOffsets();
1255 const auto newStatsQueryOffsetSz = static_cast<size_t>(newSizesAndOffsets.statsQueryOffset);
1256 const auto newStatsDataHostVecPtr = queryResultsHostVec.data() + newStatsQueryOffsetSz;
1257 const auto newStatsRemainingSize = maxResultSize - newStatsQueryOffsetSz;
1258 const auto itemSize = newSizesAndOffsets.queryItemSize;
1260 if (hasPrimitivesQuery)
1262 vkd.resetQueryPool(device, primitivesQueryPool.get(), 0u, viewCount);
1263 const auto res = vkd.getQueryPoolResults(device, primitivesQueryPool.get(), 0u, viewCount, de::dataSize(queryResultsHostVec), queryResultsHostVec.data(), newSizesAndOffsets.primitivesQuerySize, postResetFlags);
1264 checkGetQueryRes(res, true/*allowNotReady*/);
1269 vkd.resetQueryPool(device, statsQueryPool.get(), 0u, viewCount);
1270 const auto res = vkd.getQueryPoolResults(device, statsQueryPool.get(), 0u, viewCount, newStatsRemainingSize, newStatsDataHostVecPtr, newSizesAndOffsets.statsQuerySize, postResetFlags);
1271 checkGetQueryRes(res, true/*allowNotReady*/);
1274 if (hasPrimitivesQuery)
1276 for (uint32_t viewIndex = 0u; viewIndex < viewCount; ++viewIndex)
1278 const std::string queryName = "Post-reset primitive count for view " + std::to_string(viewIndex);
1279 const uint64_t primitiveCount = readFromPtrAndAdvance(&resultsPtr, itemSize);
1281 // Resetting a query without beginning it again makes numerical results undefined.
1282 //verifyQueryCounter(primitiveCount, 0ull, postResetParams, queryName);
1283 DE_UNREF(primitiveCount);
1284 readAndVerifyAvailabilityBit(&resultsPtr, itemSize, postResetParams, queryName);
1290 for (uint32_t viewIndex = 0u; viewIndex < viewCount; ++viewIndex)
1294 const uint64_t taskInvs = readFromPtrAndAdvance(&resultsPtr, itemSize);
1295 // Resetting a query without beginning it again makes numerical results undefined.
1296 //verifyQueryCounter(taskInvs, 0ull, postResetParams, "Post-reset task invocations");
1302 const uint64_t meshInvs = readFromPtrAndAdvance(&resultsPtr, itemSize);
1303 // Resetting a query without beginning it again makes numerical results undefined.
1304 //verifyQueryCounter(meshInvs, 0ull, postResetParams, "Post-reset mesh invocations");
1308 const std::string queryName = "Post-reset stats query for view " + std::to_string(viewIndex);
1309 readAndVerifyAvailabilityBit(&resultsPtr, itemSize, postResetParams, queryName);
1314 return tcu::TestStatus::pass("Pass");
1317 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
1321 tcu::TestCaseGroup* createMeshShaderQueryTestsEXT (tcu::TestContext& testCtx)
1323 GroupPtr queryGroup (new tcu::TestCaseGroup(testCtx, "query", "Mesh Shader Query Tests"));
1327 std::vector<QueryType> queryTypes;
1329 } queryCombinations[] =
1331 { { QueryType::PRIMITIVES }, "prim_query" },
1332 { { QueryType::TASK_INVOCATIONS }, "task_invs_query" },
1333 { { QueryType::MESH_INVOCATIONS }, "mesh_invs_query" },
1334 { { QueryType::TASK_INVOCATIONS, QueryType::MESH_INVOCATIONS }, "all_stats_query" },
1335 { { QueryType::PRIMITIVES, QueryType::TASK_INVOCATIONS, QueryType::MESH_INVOCATIONS }, "all_queries" },
1340 DrawCallType drawCallType;
1344 { DrawCallType::DIRECT, "draw" },
1345 { DrawCallType::INDIRECT, "indirect_draw" },
1346 { DrawCallType::INDIRECT_WITH_COUNT, "indirect_with_count_draw" },
1351 std::vector<uint32_t> drawBlocks;
1355 { {}, "no_blocks" },
1356 { {10u}, "single_block" },
1357 { {10u, 20u, 30u}, "multiple_blocks" },
1362 ResetCase resetCase;
1366 { ResetCase::NONE, "no_reset" },
1367 { ResetCase::NONE_WITH_HOST, "host_reset" },
1368 { ResetCase::BEFORE_ACCESS, "reset_before" },
1369 { ResetCase::AFTER_ACCESS, "reset_after" },
1374 AccessMethod accessMethod;
1378 { AccessMethod::COPY, "copy" },
1379 { AccessMethod::GET, "get" },
1384 GeometryType geometry;
1388 { GeometryType::POINTS, "points" },
1389 { GeometryType::LINES, "lines" },
1390 { GeometryType::TRIANGLES, "triangles" },
1405 bool availabilityFlag;
1407 } availabilityCases[] =
1409 { false, "no_availability" },
1410 { true, "with_availability" },
1419 { false, "no_wait" },
1427 } taskShaderCases[] =
1429 { false, "mesh_only" },
1430 { true, "task_mesh" },
1435 bool insideRenderPass;
1439 { false, "include_rp" },
1440 { true, "inside_rp" },
1447 } multiViewCases[] =
1449 { false, "single_view" },
1450 { true, "multi_view" },
1457 } cmdBufferTypes[] =
1459 { false, "only_primary" },
1460 { true, "with_secondary" },
1463 for (const auto& queryCombination : queryCombinations)
1465 const bool hasPrimitivesQuery = de::contains(queryCombination.queryTypes.begin(), queryCombination.queryTypes.end(), QueryType::PRIMITIVES);
1467 GroupPtr queryCombinationGroup (new tcu::TestCaseGroup(testCtx, queryCombination.name, ""));
1469 for (const auto& geometryCase : geometryCases)
1471 const bool nonTriangles = (geometryCase.geometry != GeometryType::TRIANGLES);
1473 // For cases without primitive queries, skip non-triangle geometries.
1474 if (!hasPrimitivesQuery && nonTriangles)
1477 GroupPtr geometryCaseGroup (new tcu::TestCaseGroup(testCtx, geometryCase.name, ""));
1479 for (const auto& resetType : resetTypes)
1481 GroupPtr resetTypeGroup (new tcu::TestCaseGroup(testCtx, resetType.name, ""));
1483 for (const auto& accessMethod : accessMethods)
1485 // Get + reset after access is not a valid combination (queries will be accessed after submission).
1486 if (accessMethod.accessMethod == AccessMethod::GET && resetType.resetCase == ResetCase::AFTER_ACCESS)
1489 GroupPtr accessMethodGroup (new tcu::TestCaseGroup(testCtx, accessMethod.name, ""));
1491 for (const auto& waitCase : waitCases)
1493 // Wait and reset before access is not valid (the query would never finish).
1494 if (resetType.resetCase == ResetCase::BEFORE_ACCESS && waitCase.waitFlag)
1497 GroupPtr waitCaseGroup (new tcu::TestCaseGroup(testCtx, waitCase.name, ""));
1499 for (const auto& drawCall : drawCalls)
1501 // Explicitly remove some combinations with non-triangles, just to reduce the number of tests.
1502 if (drawCall.drawCallType != DrawCallType::DIRECT && nonTriangles)
1505 GroupPtr drawCallGroup (new tcu::TestCaseGroup(testCtx, drawCall.name, ""));
1507 for (const auto& resultSize : resultSizes)
1509 // Explicitly remove some combinations with non-triangles, just to reduce the number of tests.
1510 if (resultSize.use64Bits && nonTriangles)
1513 GroupPtr resultSizeGroup (new tcu::TestCaseGroup(testCtx, resultSize.name, ""));
1515 for (const auto& availabilityCase : availabilityCases)
1517 // Explicitly remove some combinations with non-triangles, just to reduce the number of tests.
1518 if (availabilityCase.availabilityFlag && nonTriangles)
1521 GroupPtr availabilityCaseGroup (new tcu::TestCaseGroup(testCtx, availabilityCase.name, ""));
1523 for (const auto& blockCase : blockCases)
1525 // Explicitly remove some combinations with non-triangles, just to reduce the number of tests.
1526 if (blockCase.drawBlocks.size() <= 1 && nonTriangles)
1529 GroupPtr blockCaseGroup (new tcu::TestCaseGroup(testCtx, blockCase.name, ""));
1531 for (const auto& taskShaderCase : taskShaderCases)
1533 GroupPtr taskShaderCaseGroup (new tcu::TestCaseGroup(testCtx, taskShaderCase.name, ""));
1535 for (const auto& orderingCase : orderingCases)
1537 GroupPtr orderingCaseGroup (new tcu::TestCaseGroup(testCtx, orderingCase.name, ""));
1539 for (const auto& multiViewCase : multiViewCases)
1541 if (multiViewCase.multiView && !orderingCase.insideRenderPass)
1544 GroupPtr multiViewGroup (new tcu::TestCaseGroup(testCtx, multiViewCase.name, ""));
1546 for (const auto& cmdBufferType : cmdBufferTypes)
1549 params.queryTypes = queryCombination.queryTypes;
1550 params.drawBlocks = blockCase.drawBlocks;
1551 params.drawCall = drawCall.drawCallType;
1552 params.geometry = geometryCase.geometry;
1553 params.resetType = resetType.resetCase;
1554 params.access = accessMethod.accessMethod;
1555 params.use64Bits = resultSize.use64Bits;
1556 params.availabilityBit = availabilityCase.availabilityFlag;
1557 params.waitBit = waitCase.waitFlag;
1558 params.useTaskShader = taskShaderCase.taskShader;
1559 params.insideRenderPass = orderingCase.insideRenderPass;
1560 params.useSecondary = cmdBufferType.useSecondary;
1561 params.multiView = multiViewCase.multiView;
1563 multiViewGroup->addChild(new MeshQueryCase(testCtx, cmdBufferType.name, "", std::move(params)));
1566 orderingCaseGroup->addChild(multiViewGroup.release());
1569 taskShaderCaseGroup->addChild(orderingCaseGroup.release());
1572 blockCaseGroup->addChild(taskShaderCaseGroup.release());
1575 availabilityCaseGroup->addChild(blockCaseGroup.release());
1578 resultSizeGroup->addChild(availabilityCaseGroup.release());
1581 drawCallGroup->addChild(resultSizeGroup.release());
1584 waitCaseGroup->addChild(drawCallGroup.release());
1587 accessMethodGroup->addChild(waitCaseGroup.release());
1590 resetTypeGroup->addChild(accessMethodGroup.release());
1593 geometryCaseGroup->addChild(resetTypeGroup.release());
1596 queryCombinationGroup->addChild(geometryCaseGroup.release());
1599 queryGroup->addChild(queryCombinationGroup.release());
1602 return queryGroup.release();