Merge vk-gl-cts/master into vk-gl-cts/vulkan-cts-next-dev
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / query_pool / vktQueryPoolPerformanceTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Vulkan Performance Query Tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "vktQueryPoolPerformanceTests.hpp"
25 #include "vktTestCase.hpp"
26
27 #include "vktDrawImageObjectUtil.hpp"
28 #include "vktDrawBufferObjectUtil.hpp"
29 #include "vktDrawCreateInfoUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkQueryUtil.hpp"
36
37 #include "deMath.h"
38
39 #include "tcuTestLog.hpp"
40 #include "tcuResource.hpp"
41 #include "tcuImageCompare.hpp"
42 #include "vkImageUtil.hpp"
43 #include "tcuCommandLine.hpp"
44 #include "tcuRGBA.hpp"
45
46 namespace vkt
47 {
48 namespace QueryPool
49 {
50 namespace
51 {
52
53 using namespace vk;
54 using namespace Draw;
55
56 std::string uuidToHex(const deUint8 uuid[])
57 {
58         const size_t    bytesPerPart[]  = {4, 2, 2, 2, 6};
59         const deUint8*  ptr                             = &uuid[0];
60         const size_t    stringSize              = VK_UUID_SIZE * 2 + DE_LENGTH_OF_ARRAY(bytesPerPart) - 1;
61         std::string             result;
62
63         result.reserve(stringSize);
64
65         for (size_t partNdx = 0; partNdx < DE_LENGTH_OF_ARRAY(bytesPerPart); ++partNdx)
66         {
67                 const size_t    bytesInPart             = bytesPerPart[partNdx];
68                 const size_t    symbolsInPart   = 2 * bytesInPart;
69                 deUint64                part                    = 0;
70                 std::string             partString;
71
72                 for (size_t byteInPartNdx = 0; byteInPartNdx < bytesInPart; ++byteInPartNdx)
73                 {
74                         part = (part << 8) | *ptr;
75                         ++ptr;
76                 }
77
78                 partString      = tcu::toHex(part).toString();
79
80                 DE_ASSERT(partString.size() > symbolsInPart);
81
82                 result += (symbolsInPart >= partString.size()) ? partString : partString.substr(partString.size() - symbolsInPart);
83
84                 if (partNdx + 1 != DE_LENGTH_OF_ARRAY(bytesPerPart))
85                         result += '-';
86         }
87
88         DE_ASSERT(ptr == &uuid[VK_UUID_SIZE]);
89         DE_ASSERT(result.size() == stringSize);
90
91         return result;
92 }
93
94 class EnumerateAndValidateTest : public TestInstance
95 {
96 public:
97                                                 EnumerateAndValidateTest                (vkt::Context&  context, VkQueueFlagBits queueFlagBits);
98         tcu::TestStatus         iterate                                                 (void);
99
100 protected:
101         void                            basicValidateCounter                    (const deUint32 familyIndex);
102
103 private:
104         VkQueueFlagBits         m_queueFlagBits;
105         bool                            m_requiredExtensionsPresent;
106 };
107
108 EnumerateAndValidateTest::EnumerateAndValidateTest (vkt::Context& context, VkQueueFlagBits queueFlagBits)
109         : TestInstance(context)
110         , m_queueFlagBits(queueFlagBits)
111         , m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
112 {
113 }
114
115 tcu::TestStatus EnumerateAndValidateTest::iterate (void)
116 {
117         const InstanceInterface&                                        vki                             = m_context.getInstanceInterface();
118         const VkPhysicalDevice                                          physicalDevice  = m_context.getPhysicalDevice();
119         const std::vector<VkQueueFamilyProperties>      queueProperties = getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);
120
121         for (deUint32 queueNdx = 0; queueNdx < queueProperties.size(); queueNdx++)
122         {
123                 if ((queueProperties[queueNdx].queueFlags & m_queueFlagBits) == 0)
124                         continue;
125
126                 deUint32 counterCount = 0;
127                 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCount, DE_NULL, DE_NULL));
128
129                 if (counterCount == 0)
130                         continue;
131
132                 {
133                         std::vector<VkPerformanceCounterKHR>    counters                        (counterCount);
134                         deUint32                                                                counterCountRead        = counterCount;
135                         std::map<std::string, size_t>                   uuidValidator;
136
137                         if (counterCount > 1)
138                         {
139                                 deUint32        incompleteCounterCount  = counterCount - 1;
140                                 VkResult        result;
141
142                                 result = vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &incompleteCounterCount, &counters[0], DE_NULL);
143                                 if (result != VK_INCOMPLETE)
144                                         TCU_FAIL("VK_INCOMPLETE not returned");
145                         }
146
147                         VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCountRead, &counters[0], DE_NULL));
148
149                         if (counterCountRead != counterCount)
150                                 TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) + ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
151
152                         for (size_t counterNdx = 0; counterNdx < counters.size(); ++counterNdx)
153                         {
154                                 const VkPerformanceCounterKHR&  counter                 = counters[counterNdx];
155                                 const std::string                               uuidStr                 = uuidToHex(counter.uuid);
156
157                                 if (uuidValidator.find(uuidStr) != uuidValidator.end())
158                                         TCU_FAIL("Duplicate counter UUID detected " + uuidStr);
159                                 else
160                                         uuidValidator[uuidStr] = counterNdx;
161
162                                 if (counter.scope >= VK_PERFORMANCE_COUNTER_SCOPE_KHR_LAST)
163                                         TCU_FAIL("Counter scope is invalid " + de::toString(static_cast<size_t>(counter.scope)));
164
165                                 if (counter.storage >= VK_PERFORMANCE_COUNTER_STORAGE_KHR_LAST)
166                                         TCU_FAIL("Counter storage is invalid " + de::toString(static_cast<size_t>(counter.storage)));
167
168                                 if (counter.unit >= VK_PERFORMANCE_COUNTER_UNIT_KHR_LAST)
169                                         TCU_FAIL("Counter unit is invalid " + de::toString(static_cast<size_t>(counter.unit)));
170                         }
171                 }
172                 {
173                         std::vector<VkPerformanceCounterDescriptionKHR> counterDescriptors      (counterCount);
174                         deUint32                                                                                counterCountRead        = counterCount;
175
176                         VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCountRead, DE_NULL, &counterDescriptors[0]));
177
178                         if (counterCountRead != counterCount)
179                                 TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) + ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
180
181                         for (size_t counterNdx = 0; counterNdx < counterDescriptors.size(); ++counterNdx)
182                         {
183                                 const VkPerformanceCounterDescriptionKHR&               counterDescriptor       = counterDescriptors[counterNdx];
184                                 const VkPerformanceCounterDescriptionFlagsKHR   allowedFlags            = VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR
185                                                                                                                                                                         | VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_KHR;
186
187                                 if ((counterDescriptor.flags & ~allowedFlags) != 0)
188                                         TCU_FAIL("Invalid flags present in VkPerformanceCounterDescriptionFlagsKHR");
189                         }
190                 }
191         }
192
193         return tcu::TestStatus::pass("Pass");
194 }
195
196 class QueryTestBase : public TestInstance
197 {
198 public:
199                                                 QueryTestBase   (vkt::Context&  context);
200
201 protected:
202
203         void                            setupCounters                   (void);
204         Move<VkQueryPool>       createQueryPool                 (deUint32 enabledCounterOffset, deUint32 enabledCounterStride);
205         bool                            acquireProfilingLock    (void);
206         void                            releaseProfilingLock    (void);
207         bool                            verifyQueryResults              (VkQueryPool queryPool);
208         deUint32                        getRequiredNumerOfPasses(void);
209
210 private:
211
212         bool                                                                    m_requiredExtensionsPresent;
213         deUint32                                                                m_requiredNumerOfPasses;
214         std::map<deUint64, deUint32>                    m_enabledCountersCountMap;              // number of counters that were enabled per query pool
215         std::vector<VkPerformanceCounterKHR>    m_counters;                                             // counters provided by the device
216 };
217
218 QueryTestBase::QueryTestBase(vkt::Context& context)
219         : TestInstance  (context)
220         , m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
221         , m_requiredNumerOfPasses(0)
222 {
223 }
224
225 void QueryTestBase::setupCounters()
226 {
227         const InstanceInterface&        vki                                     = m_context.getInstanceInterface();
228         const VkPhysicalDevice          physicalDevice          = m_context.getPhysicalDevice();
229         const CmdPoolCreateInfo         cmdPoolCreateInfo       = m_context.getUniversalQueueFamilyIndex();
230         deUint32                                        queueFamilyIndex        = cmdPoolCreateInfo.queueFamilyIndex;
231         deUint32                                        counterCount;
232
233         if (!m_context.getPerformanceQueryFeatures().performanceCounterQueryPools)
234                 TCU_THROW(NotSupportedError, "Performance counter query pools feature not supported");
235
236         // get the number of supported counters
237         VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex, &counterCount, NULL, NULL));
238
239         if (!counterCount)
240                 TCU_THROW(NotSupportedError, "QualityWarning: there are no performance counters");
241
242         // get supported counters
243         m_counters.resize(counterCount);
244         VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex, &counterCount, &m_counters[0], DE_NULL));
245 }
246
247 Move<VkQueryPool> QueryTestBase::createQueryPool(deUint32 enabledCounterOffset, deUint32 enabledCounterStride)
248 {
249         const InstanceInterface&        vki                                     = m_context.getInstanceInterface();
250         const DeviceInterface&          vkd                                     = m_context.getDeviceInterface();
251         const VkPhysicalDevice          physicalDevice          = m_context.getPhysicalDevice();
252         const VkDevice                          device                          = m_context.getDevice();
253         const CmdPoolCreateInfo         cmdPoolCreateInfo       = m_context.getUniversalQueueFamilyIndex();
254         const deUint32                          counterCount            = (deUint32)m_counters.size();
255         deUint32                                        enabledIndex            = enabledCounterOffset ? 0 : enabledCounterStride;
256         std::vector<deUint32>           enabledCounters;
257
258         // enable every <enabledCounterStride> counter that has command or render pass scope
259         for (deUint32 i = 0; i < counterCount; i++)
260         {
261                 // handle offset
262                 if (enabledCounterOffset)
263                 {
264                         if (enabledCounterOffset == enabledIndex)
265                         {
266                                 // disable handling offset
267                                 enabledCounterOffset = 0;
268
269                                 // eneble next index in stride condition
270                                 enabledIndex = enabledCounterStride;
271                         }
272                         else
273                         {
274                                 ++enabledIndex;
275                                 continue;
276                         }
277                 }
278
279                 // handle stride
280                 if (enabledIndex == enabledCounterStride)
281                 {
282                         enabledCounters.push_back(i);
283                         enabledIndex = 0;
284                 }
285                 else
286                         ++enabledIndex;
287         }
288
289         // get number of counters that were enabled for this query pool
290         deUint32 enabledCountersCount = static_cast<deUint32>(enabledCounters.size());
291         if (!enabledCountersCount)
292                 TCU_THROW(NotSupportedError, "QualityWarning: no performance counters");
293
294         // define performance query
295         VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo =
296         {
297                 VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR,
298                 NULL,
299                 cmdPoolCreateInfo.queueFamilyIndex,                     // queue family that this performance query is performed on
300                 enabledCountersCount,                                           // number of counters to enable
301                 &enabledCounters[0]                                                     // array of indices of counters to enable
302         };
303
304         // get the number of passes counters will require
305         vki.getPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(physicalDevice, &performanceQueryCreateInfo, &m_requiredNumerOfPasses);
306
307         // create query pool
308         VkQueryPoolCreateInfo queryPoolCreateInfo =
309         {
310                 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
311                 &performanceQueryCreateInfo,
312                 0,                                                                                      // flags
313                 VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR,            // new query type
314                 1,                                                                                      // queryCount
315                 0
316         };
317
318         Move<VkQueryPool> queryPool = vk::createQueryPool(vkd, device, &queryPoolCreateInfo);
319
320         // memorize number of enabled counters for this query pool
321         m_enabledCountersCountMap[queryPool.get().getInternal()] = enabledCountersCount;
322
323         return queryPool;
324 }
325
326 bool QueryTestBase::acquireProfilingLock()
327 {
328         const DeviceInterface&          vkd             = m_context.getDeviceInterface();
329         const VkDevice                          device  = m_context.getDevice();
330
331         // acquire profiling lock before we record command buffers
332         VkAcquireProfilingLockInfoKHR lockInfo =
333         {
334                 VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
335                 NULL,
336                 0,
337                 2000000000ull                                   // wait 2s for the lock
338         };
339
340         VkResult result = vkd.acquireProfilingLockKHR(device, &lockInfo);
341         if (result == VK_TIMEOUT)
342         {
343                 m_context.getTestContext().getLog() << tcu::TestLog::Message
344                         << "Timeout reached, profiling lock wasn't acquired - test had to end earlier"
345                         << tcu::TestLog::EndMessage;
346                 return false;
347         }
348         if (result != VK_SUCCESS)
349                 TCU_FAIL("Profiling lock wasn't acquired");
350
351         return true;
352 }
353
354 void QueryTestBase::releaseProfilingLock()
355 {
356         const DeviceInterface&  vkd             = m_context.getDeviceInterface();
357         const VkDevice                  device  = m_context.getDevice();
358
359         // release the profiling lock after the command buffer is no longer in the pending state
360         vkd.releaseProfilingLockKHR(device);
361 }
362
363 bool QueryTestBase::verifyQueryResults(VkQueryPool queryPool)
364 {
365         const DeviceInterface&          vkd             = m_context.getDeviceInterface();
366         const VkDevice                          device  = m_context.getDevice();
367
368         // create an array to hold the results of all counters
369         deUint32 enabledCounterCount = m_enabledCountersCountMap[queryPool.getInternal()];
370         std::vector<VkPerformanceCounterResultKHR> recordedCounters(enabledCounterCount);
371
372         // verify that query result can be retrieved
373         VkResult result = vkd.getQueryPoolResults(device, queryPool, 0, 1, sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount,
374                 &recordedCounters[0], sizeof(VkPerformanceCounterResultKHR), VK_QUERY_RESULT_WAIT_BIT);
375         if (result == VK_NOT_READY)
376         {
377                 m_context.getTestContext().getLog() << tcu::TestLog::Message
378                         << "Pass but result is not ready"
379                         << tcu::TestLog::EndMessage;
380                 return true;
381         }
382         return (result == VK_SUCCESS);
383 }
384
385 deUint32 QueryTestBase::getRequiredNumerOfPasses()
386 {
387         return m_requiredNumerOfPasses;
388 }
389
390 // Base class for all graphic tests
391 class GraphicQueryTestBase : public QueryTestBase
392 {
393 public:
394         GraphicQueryTestBase(vkt::Context&      context);
395
396 protected:
397         void initStateObjects(void);
398
399 protected:
400         Move<VkPipeline>                m_pipeline;
401         Move<VkPipelineLayout>  m_pipelineLayout;
402
403         de::SharedPtr<Image>    m_colorAttachmentImage;
404         Move<VkImageView>               m_attachmentView;
405
406         Move<VkRenderPass>              m_renderPass;
407         Move<VkFramebuffer>             m_framebuffer;
408
409         de::SharedPtr<Buffer>   m_vertexBuffer;
410
411         VkFormat                                m_colorAttachmentFormat;
412         deUint32                                m_size;
413 };
414
415 GraphicQueryTestBase::GraphicQueryTestBase(vkt::Context& context)
416         : QueryTestBase(context)
417         , m_colorAttachmentFormat(VK_FORMAT_R8G8B8A8_UNORM)
418         , m_size(32)
419 {
420 }
421
422 void GraphicQueryTestBase::initStateObjects(void)
423 {
424         const VkDevice                          device  = m_context.getDevice();
425         const DeviceInterface&          vkd             = m_context.getDeviceInterface();
426
427         //attachment images and views
428         {
429                 VkExtent3D imageExtent =
430                 {
431                         m_size,         // width
432                         m_size,         // height
433                         1                       // depth
434                 };
435
436                 const ImageCreateInfo colorImageCreateInfo(VK_IMAGE_TYPE_2D, m_colorAttachmentFormat, imageExtent, 1, 1,
437                                                                                                    VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_TILING_OPTIMAL,
438                                                                                                    VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
439
440                 m_colorAttachmentImage = Image::createAndAlloc(vkd, device, colorImageCreateInfo, m_context.getDefaultAllocator(),
441                                                                                                            m_context.getUniversalQueueFamilyIndex());
442
443                 const ImageViewCreateInfo attachmentViewInfo(m_colorAttachmentImage->object(), VK_IMAGE_VIEW_TYPE_2D, m_colorAttachmentFormat);
444                 m_attachmentView = createImageView(vkd, device, &attachmentViewInfo);
445         }
446
447         // renderpass and framebuffer
448         {
449                 RenderPassCreateInfo renderPassCreateInfo;
450                 renderPassCreateInfo.addAttachment(AttachmentDescription(m_colorAttachmentFormat,                               // format
451                                                                                                                                  VK_SAMPLE_COUNT_1_BIT,                                 // samples
452                                                                                                                                  VK_ATTACHMENT_LOAD_OP_CLEAR,                   // loadOp
453                                                                                                                                  VK_ATTACHMENT_STORE_OP_DONT_CARE,              // storeOp
454                                                                                                                                  VK_ATTACHMENT_LOAD_OP_DONT_CARE,               // stencilLoadOp
455                                                                                                                                  VK_ATTACHMENT_STORE_OP_DONT_CARE,              // stencilLoadOp
456                                                                                                                                  VK_IMAGE_LAYOUT_GENERAL,                               // initialLauout
457                                                                                                                                  VK_IMAGE_LAYOUT_GENERAL));                             // finalLayout
458
459                 const VkAttachmentReference colorAttachmentReference =
460                 {
461                         0,                                                                                                                                                                                      // attachment
462                         VK_IMAGE_LAYOUT_GENERAL                                                                                                                                         // layout
463                 };
464
465                 renderPassCreateInfo.addSubpass(SubpassDescription(VK_PIPELINE_BIND_POINT_GRAPHICS,                             // pipelineBindPoint
466                                                                                                                    0,                                                                                   // flags
467                                                                                                                    0,                                                                                   // inputCount
468                                                                                                                    DE_NULL,                                                                             // pInputAttachments
469                                                                                                                    1,                                                                                   // colorCount
470                                                                                                                    &colorAttachmentReference,                                   // pColorAttachments
471                                                                                                                    DE_NULL,                                                                             // pResolveAttachments
472                                                                                                                    AttachmentReference(),                                               // depthStencilAttachment
473                                                                                                                    0,                                                                                   // preserveCount
474                                                                                                                    DE_NULL));                                                                   // preserveAttachments
475
476                 m_renderPass = createRenderPass(vkd, device, &renderPassCreateInfo);
477
478                 std::vector<VkImageView> attachments(1);
479                 attachments[0] = *m_attachmentView;
480
481                 FramebufferCreateInfo framebufferCreateInfo(*m_renderPass, attachments, m_size, m_size, 1);
482                 m_framebuffer = createFramebuffer(vkd, device, &framebufferCreateInfo);
483         }
484
485         // pipeline
486         {
487                 Unique<VkShaderModule> vs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("vert"), 0));
488                 Unique<VkShaderModule> fs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("frag"), 0));
489
490                 const PipelineCreateInfo::ColorBlendState::Attachment attachmentState;
491
492                 const PipelineLayoutCreateInfo pipelineLayoutCreateInfo;
493                 m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
494
495                 const VkVertexInputBindingDescription vf_binding_desc =
496                 {
497                         0,                                                                                                                              // binding
498                         4 * (deUint32)sizeof(float),                                                                    // stride
499                         VK_VERTEX_INPUT_RATE_VERTEX                                                                             // inputRate
500                 };
501
502                 const VkVertexInputAttributeDescription vf_attribute_desc =
503                 {
504                         0,                                                                                                                              // location
505                         0,                                                                                                                              // binding
506                         VK_FORMAT_R32G32B32A32_SFLOAT,                                                                  // format
507                         0                                                                                                                               // offset
508                 };
509
510                 const VkPipelineVertexInputStateCreateInfo vf_info =
511                 {
512                         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,              // sType
513                         NULL,                                                                                                                   // pNext
514                         0u,                                                                                                                             // flags
515                         1,                                                                                                                              // vertexBindingDescriptionCount
516                         &vf_binding_desc,                                                                                               // pVertexBindingDescriptions
517                         1,                                                                                                                              // vertexAttributeDescriptionCount
518                         &vf_attribute_desc                                                                                              // pVertexAttributeDescriptions
519                 };
520
521                 PipelineCreateInfo pipelineCreateInfo(*m_pipelineLayout, *m_renderPass, 0, 0);
522                 pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*vs, "main", VK_SHADER_STAGE_VERTEX_BIT));
523                 pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*fs, "main", VK_SHADER_STAGE_FRAGMENT_BIT));
524                 pipelineCreateInfo.addState(PipelineCreateInfo::InputAssemblerState(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST));
525                 pipelineCreateInfo.addState(PipelineCreateInfo::ColorBlendState(1, &attachmentState));
526                 const VkViewport viewport       = makeViewport(m_size, m_size);
527                 const VkRect2D scissor          = makeRect2D(m_size, m_size);
528                 pipelineCreateInfo.addState(PipelineCreateInfo::ViewportState(1, std::vector<VkViewport>(1, viewport), std::vector<VkRect2D>(1, scissor)));
529                 pipelineCreateInfo.addState(PipelineCreateInfo::DepthStencilState(false, false, VK_COMPARE_OP_GREATER_OR_EQUAL));
530                 pipelineCreateInfo.addState(PipelineCreateInfo::RasterizerState());
531                 pipelineCreateInfo.addState(PipelineCreateInfo::MultiSampleState());
532                 pipelineCreateInfo.addState(vf_info);
533                 m_pipeline = createGraphicsPipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
534         }
535
536         // vertex buffer
537         {
538                 std::vector<tcu::Vec4> vertices(3);
539                 vertices[0] = tcu::Vec4(0.5, 0.5, 0.0, 1.0);
540                 vertices[1] = tcu::Vec4(0.5, 0.0, 0.0, 1.0);
541                 vertices[2] = tcu::Vec4(0.0, 0.5, 0.0, 1.0);
542
543                 const size_t kBufferSize = vertices.size() * sizeof(tcu::Vec4);
544                 m_vertexBuffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(kBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT), m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
545
546                 tcu::Vec4 *ptr = reinterpret_cast<tcu::Vec4*>(m_vertexBuffer->getBoundMemory().getHostPtr());
547                 deMemcpy(ptr, &vertices[0], kBufferSize);
548
549                 flushMappedMemoryRange(vkd, device,     m_vertexBuffer->getBoundMemory().getMemory(), m_vertexBuffer->getBoundMemory().getOffset(), VK_WHOLE_SIZE);
550         }
551 }
552
553
554 class GraphicQueryTest : public GraphicQueryTestBase
555 {
556 public:
557                                                 GraphicQueryTest        (vkt::Context&  context);
558         tcu::TestStatus         iterate                         (void);
559 };
560
561 GraphicQueryTest::GraphicQueryTest(vkt::Context& context)
562         : GraphicQueryTestBase(context)
563 {
564 }
565
566 tcu::TestStatus GraphicQueryTest::iterate(void)
567 {
568         const DeviceInterface&          vkd                                     = m_context.getDeviceInterface();
569         const VkDevice                          device                          = m_context.getDevice();
570         const VkQueue                           queue                           = m_context.getUniversalQueue();
571         const CmdPoolCreateInfo         cmdPoolCreateInfo       = m_context.getUniversalQueueFamilyIndex();
572         Unique<VkCommandPool>           cmdPool                         (createCommandPool(vkd, device, &cmdPoolCreateInfo));
573         Unique<VkCommandBuffer>         cmdBuffer                       (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
574
575         initStateObjects();
576         setupCounters();
577
578         vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
579
580         if (!acquireProfilingLock())
581         {
582                 // lock was not acquired in given time, we can't fail the test
583                 return tcu::TestStatus::pass("Pass");
584         }
585
586         // reset query pool
587         {
588                 Unique<VkCommandBuffer>         resetCmdBuffer  (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
589                 const Unique<VkFence>           fence                   (createFence(vkd, device));
590                 const VkSubmitInfo                      submitInfo              =
591                 {
592                         VK_STRUCTURE_TYPE_SUBMIT_INFO,                                          // sType
593                         DE_NULL,                                                                                        // pNext
594                         0u,                                                                                                     // waitSemaphoreCount
595                         DE_NULL,                                                                                        // pWaitSemaphores
596                         (const VkPipelineStageFlags*)DE_NULL,                           // pWaitDstStageMask
597                         1u,                                                                                                     // commandBufferCount
598                         &resetCmdBuffer.get(),                                                          // pCommandBuffers
599                         0u,                                                                                                     // signalSemaphoreCount
600                         DE_NULL,                                                                                        // pSignalSemaphores
601                 };
602
603                 beginCommandBuffer(vkd, *resetCmdBuffer);
604                 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
605                 endCommandBuffer(vkd, *resetCmdBuffer);
606
607                 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
608                 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
609         }
610
611         // begin command buffer
612         const VkCommandBufferBeginInfo commandBufBeginParams =
613         {
614                 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
615                 DE_NULL,
616                 VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT,
617                 (const VkCommandBufferInheritanceInfo*)DE_NULL,
618         };
619         VK_CHECK(vkd.beginCommandBuffer(*cmdBuffer, &commandBufBeginParams));
620
621         initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
622                                                                   VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
623
624         // begin render pass
625         VkClearValue renderPassClearValue;
626         deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
627
628         // perform query during triangle draw
629         vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0, VK_QUERY_CONTROL_PRECISE_BIT);
630
631         beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer,
632                                         makeRect2D(0, 0, m_size, m_size),
633                                         1, &renderPassClearValue);
634
635         // bind pipeline
636         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
637
638         // bind vertex buffer
639         VkBuffer vertexBuffer = m_vertexBuffer->object();
640         const VkDeviceSize vertexBufferOffset = 0;
641         vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
642
643         vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
644
645         endRenderPass(vkd, *cmdBuffer);
646
647         vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0);
648
649         transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
650                                           VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
651                                           VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
652
653         endCommandBuffer(vkd, *cmdBuffer);
654
655         // submit command buffer for each pass and wait for its completion
656         for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
657         {
658                 const Unique<VkFence> fence(createFence(vkd, device));
659
660                 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
661                 {
662                         VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
663                         NULL,
664                         passIndex
665                 };
666
667                 const VkSubmitInfo submitInfo =
668                 {
669                         VK_STRUCTURE_TYPE_SUBMIT_INFO,                                          // sType
670                         &performanceQuerySubmitInfo,                                            // pNext
671                         0u,                                                                                                     // waitSemaphoreCount
672                         DE_NULL,                                                                                        // pWaitSemaphores
673                         (const VkPipelineStageFlags*)DE_NULL,                           // pWaitDstStageMask
674                         1u,                                                                                                     // commandBufferCount
675                         &cmdBuffer.get(),                                                                       // pCommandBuffers
676                         0u,                                                                                                     // signalSemaphoreCount
677                         DE_NULL,                                                                                        // pSignalSemaphores
678                 };
679
680                 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
681                 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
682         }
683
684         releaseProfilingLock();
685
686         VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
687
688         if (verifyQueryResults(*queryPool))
689                 return tcu::TestStatus::pass("Pass");
690         return tcu::TestStatus::fail("Fail");
691 }
692
693 class GraphicMultiplePoolsTest : public GraphicQueryTestBase
694 {
695 public:
696                                                 GraphicMultiplePoolsTest        (vkt::Context&  context);
697         tcu::TestStatus         iterate                                         (void);
698 };
699
700 GraphicMultiplePoolsTest::GraphicMultiplePoolsTest(vkt::Context& context)
701         : GraphicQueryTestBase(context)
702 {
703 }
704
705 tcu::TestStatus GraphicMultiplePoolsTest::iterate(void)
706 {
707         if (!m_context.getPerformanceQueryFeatures().performanceCounterMultipleQueryPools)
708                 throw tcu::NotSupportedError("MultipleQueryPools not supported");
709
710         const DeviceInterface&          vkd                                     = m_context.getDeviceInterface();
711         const VkDevice                          device                          = m_context.getDevice();
712         const VkQueue                           queue                           = m_context.getUniversalQueue();
713         const CmdPoolCreateInfo         cmdPoolCreateInfo       = m_context.getUniversalQueueFamilyIndex();
714         Unique<VkCommandPool>           cmdPool                         (createCommandPool(vkd, device, &cmdPoolCreateInfo));
715         Unique<VkCommandBuffer>         cmdBuffer                       (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
716
717         initStateObjects();
718         setupCounters();
719
720         vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)),
721                                                         queryPool2(createQueryPool(1, 2));
722
723         if (!acquireProfilingLock())
724         {
725                 // lock was not acquired in given time, we can't fail the test
726                 return tcu::TestStatus::pass("Pass");
727         }
728
729         // reset query pools
730         {
731                 Unique<VkCommandBuffer>         resetCmdBuffer  (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
732                 const Unique<VkFence>           fence                   (createFence(vkd, device));
733                 const VkSubmitInfo                      submitInfo              =
734                 {
735                         VK_STRUCTURE_TYPE_SUBMIT_INFO,                                          // sType
736                         DE_NULL,                                                                                        // pNext
737                         0u,                                                                                                     // waitSemaphoreCount
738                         DE_NULL,                                                                                        // pWaitSemaphores
739                         (const VkPipelineStageFlags*)DE_NULL,                           // pWaitDstStageMask
740                         1u,                                                                                                     // commandBufferCount
741                         &resetCmdBuffer.get(),                                                          // pCommandBuffers
742                         0u,                                                                                                     // signalSemaphoreCount
743                         DE_NULL,                                                                                        // pSignalSemaphores
744                 };
745
746                 beginCommandBuffer(vkd, *resetCmdBuffer);
747                 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool1, 0u, 1u);
748                 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool2, 0u, 1u);
749                 endCommandBuffer(vkd, *resetCmdBuffer);
750
751                 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
752                 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
753         }
754
755         // begin command buffer
756         const VkCommandBufferBeginInfo commandBufBeginParams =
757         {
758                 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
759                 DE_NULL,
760                 VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT,
761                 (const VkCommandBufferInheritanceInfo*)DE_NULL,
762         };
763         VK_CHECK(vkd.beginCommandBuffer(*cmdBuffer, &commandBufBeginParams));
764
765         initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
766                                                                   VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
767
768         // begin render pass
769         VkClearValue renderPassClearValue;
770         deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
771
772         VkBuffer                        vertexBuffer            = m_vertexBuffer->object();
773         const VkDeviceSize      vertexBufferOffset      = 0;
774         const VkQueryPool       queryPools[]            =
775         {
776                 *queryPool1,
777                 *queryPool2
778         };
779
780         // perform two queries during triangle draw
781         for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
782         {
783                 const VkQueryPool queryPool = queryPools[loop];
784                 vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
785                 beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer,
786                                                 makeRect2D(0, 0, m_size, m_size),
787                                                 1, &renderPassClearValue);
788
789                 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
790                 vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
791                 vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
792
793                 endRenderPass(vkd, *cmdBuffer);
794                 vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
795         }
796
797         transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
798                                           VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
799                                           VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
800
801         endCommandBuffer(vkd, *cmdBuffer);
802
803         // submit command buffer for each pass and wait for its completion
804         for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
805         {
806                 const Unique<VkFence> fence(createFence(vkd, device));
807
808                 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
809                 {
810                         VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
811                         NULL,
812                         passIndex
813                 };
814
815                 const VkSubmitInfo submitInfo =
816                 {
817                         VK_STRUCTURE_TYPE_SUBMIT_INFO,                                          // sType
818                         &performanceQuerySubmitInfo,                                            // pNext
819                         0u,                                                                                                     // waitSemaphoreCount
820                         DE_NULL,                                                                                        // pWaitSemaphores
821                         (const VkPipelineStageFlags*)DE_NULL,                           // pWaitDstStageMask
822                         1u,                                                                                                     // commandBufferCount
823                         &cmdBuffer.get(),                                                                       // pCommandBuffers
824                         0u,                                                                                                     // signalSemaphoreCount
825                         DE_NULL,                                                                                        // pSignalSemaphores
826                 };
827
828                 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
829                 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
830         }
831
832         releaseProfilingLock();
833
834         VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
835
836         if (verifyQueryResults(*queryPool1) && verifyQueryResults(*queryPool2))
837                 return tcu::TestStatus::pass("Pass");
838         return tcu::TestStatus::fail("Fail");
839 }
840
841 // Base class for all compute tests
842 class ComputeQueryTestBase : public QueryTestBase
843 {
844 public:
845         ComputeQueryTestBase(vkt::Context&      context);
846
847 protected:
848         void initStateObjects(void);
849
850 protected:
851         Move<VkPipeline>                m_pipeline;
852         Move<VkPipelineLayout>  m_pipelineLayout;
853         de::SharedPtr<Buffer>   m_buffer;
854         Move<VkDescriptorPool>  m_descriptorPool;
855         Move<VkDescriptorSet>   m_descriptorSet;
856         VkDescriptorBufferInfo  m_descriptorBufferInfo;
857         VkBufferMemoryBarrier   m_computeFinishBarrier;
858 };
859
860 ComputeQueryTestBase::ComputeQueryTestBase(vkt::Context& context)
861         : QueryTestBase(context)
862 {
863 }
864
865 void ComputeQueryTestBase::initStateObjects(void)
866 {
867         const DeviceInterface&                  vkd = m_context.getDeviceInterface();
868         const VkDevice                                  device = m_context.getDevice();
869         const VkDeviceSize                              bufferSize = 32 * sizeof(deUint32);
870         const CmdPoolCreateInfo                 cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
871         const Unique<VkCommandPool>             cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
872         const Unique<VkCommandBuffer>   cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
873
874         const Unique<VkDescriptorSetLayout> descriptorSetLayout(DescriptorSetLayoutBuilder()
875                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
876                 .build(vkd, device));
877
878         // create pipeline layout
879         {
880                 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
881                 {
882                         VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,                          // sType
883                         DE_NULL,                                                                                                        // pNext
884                         0u,                                                                                                                     // flags
885                         1u,                                                                                                                     // setLayoutCount
886                         &(*descriptorSetLayout),                                                                        // pSetLayouts
887                         0u,                                                                                                                     // pushConstantRangeCount
888                         DE_NULL,                                                                                                        // pPushConstantRanges
889                 };
890                 m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutParams);
891         }
892
893         // create compute pipeline
894         {
895                 const Unique<VkShaderModule> cs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0u));
896                 const VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
897                 {
898                         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,            // sType
899                         DE_NULL,                                                                                                        // pNext
900                         (VkPipelineShaderStageCreateFlags)0u,                                           // flags
901                         VK_SHADER_STAGE_COMPUTE_BIT,                                                            // stage
902                         *cs,                                                                                                            // module
903                         "main",                                                                                                         // pName
904                         DE_NULL,                                                                                                        // pSpecializationInfo
905                 };
906                 const VkComputePipelineCreateInfo pipelineCreateInfo =
907                 {
908                         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,                         // sType
909                         DE_NULL,                                                                                                        // pNext
910                         (VkPipelineCreateFlags)0u,                                                                      // flags
911                         pipelineShaderStageParams,                                                                      // stage
912                         *m_pipelineLayout,                                                                                      // layout
913                         DE_NULL,                                                                                                        // basePipelineHandle
914                         0,                                                                                                                      // basePipelineIndex
915                 };
916                 m_pipeline = createComputePipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
917         }
918
919         m_buffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
920                 m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
921         m_descriptorPool = DescriptorPoolBuilder()
922                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
923                 .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
924         const VkDescriptorSetAllocateInfo allocateParams =
925         {
926                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,         // sType
927                 DE_NULL,                                                                                        // pNext
928                 *m_descriptorPool,                                                                      // descriptorPool
929                 1u,                                                                                                     // setLayoutCount
930                 &(*descriptorSetLayout),                                                        // pSetLayouts
931         };
932
933         m_descriptorSet = allocateDescriptorSet(vkd, device, &allocateParams);
934         const VkDescriptorBufferInfo descriptorInfo =
935         {
936                 m_buffer->object(),     // buffer
937                 0ull,                           // offset
938                 bufferSize,                     // range
939         };
940
941         DescriptorSetUpdateBuilder()
942                 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
943                 .update(vkd, device);
944
945         // clear buffer
946         const std::vector<deUint8>      data((size_t)bufferSize, 0u);
947         const Allocation&                       allocation = m_buffer->getBoundMemory();
948         void*                                           allocationData = allocation.getHostPtr();
949         invalidateMappedMemoryRange(vkd, device, allocation.getMemory(), allocation.getOffset(), bufferSize);
950         deMemcpy(allocationData, &data[0], (size_t)bufferSize);
951
952         const VkBufferMemoryBarrier barrier =
953         {
954                 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,                                        // sType
955                 DE_NULL,                                                                                                        // pNext
956                 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,         // srcAccessMask
957                 VK_ACCESS_HOST_READ_BIT,                                                                        // dstAccessMask
958                 VK_QUEUE_FAMILY_IGNORED,                                                                        // srcQueueFamilyIndex
959                 VK_QUEUE_FAMILY_IGNORED,                                                                        // destQueueFamilyIndex
960                 m_buffer->object(),                                                                                     // buffer
961                 0ull,                                                                                                           // offset
962                 bufferSize,                                                                                                     // size
963         };
964         m_computeFinishBarrier = barrier;
965 }
966
967 class ComputeQueryTest : public ComputeQueryTestBase
968 {
969 public:
970                                                 ComputeQueryTest        (vkt::Context&  context);
971         tcu::TestStatus         iterate                         (void);
972 };
973
974 ComputeQueryTest::ComputeQueryTest(vkt::Context& context)
975         : ComputeQueryTestBase(context)
976 {
977 }
978
979 tcu::TestStatus ComputeQueryTest::iterate(void)
980 {
981         const DeviceInterface&                  vkd                                     = m_context.getDeviceInterface();
982         const VkDevice                                  device                          = m_context.getDevice();
983         const VkQueue                                   queue                           = m_context.getUniversalQueue();
984         const CmdPoolCreateInfo                 cmdPoolCreateInfo       (m_context.getUniversalQueueFamilyIndex());
985         const Unique<VkCommandPool>             cmdPool                         (createCommandPool(vkd, device, &cmdPoolCreateInfo));
986         const Unique<VkCommandBuffer>   resetCmdBuffer          (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
987         const Unique<VkCommandBuffer>   cmdBuffer                       (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
988
989         initStateObjects();
990         setupCounters();
991
992         vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
993
994         if (!acquireProfilingLock())
995         {
996                 // lock was not acquired in given time, we can't fail the test
997                 return tcu::TestStatus::pass("Pass");
998         }
999
1000         beginCommandBuffer(vkd, *resetCmdBuffer);
1001         vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
1002         endCommandBuffer(vkd, *resetCmdBuffer);
1003
1004         beginCommandBuffer(vkd, *cmdBuffer);
1005         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
1006         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
1007
1008         vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0u, (VkQueryControlFlags)0u);
1009         vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1010         vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0u);
1011
1012         vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1013                 (VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
1014         endCommandBuffer(vkd, *cmdBuffer);
1015
1016         // submit reset of queries only once
1017         {
1018                 const VkSubmitInfo submitInfo =
1019                 {
1020                         VK_STRUCTURE_TYPE_SUBMIT_INFO,                                          // sType
1021                         DE_NULL,                                                                                        // pNext
1022                         0u,                                                                                                     // waitSemaphoreCount
1023                         DE_NULL,                                                                                        // pWaitSemaphores
1024                         (const VkPipelineStageFlags*)DE_NULL,                           // pWaitDstStageMask
1025                         1u,                                                                                                     // commandBufferCount
1026                         &resetCmdBuffer.get(),                                                          // pCommandBuffers
1027                         0u,                                                                                                     // signalSemaphoreCount
1028                         DE_NULL,                                                                                        // pSignalSemaphores
1029                 };
1030
1031                 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1032         }
1033
1034         // submit command buffer for each pass and wait for its completion
1035         for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
1036         {
1037                 const Unique<VkFence> fence(createFence(vkd, device));
1038
1039                 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
1040                 {
1041                         VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
1042                         NULL,
1043                         passIndex
1044                 };
1045
1046                 const VkSubmitInfo submitInfo =
1047                 {
1048                         VK_STRUCTURE_TYPE_SUBMIT_INFO,                                          // sType
1049                         &performanceQuerySubmitInfo,                                            // pNext
1050                         0u,                                                                                                     // waitSemaphoreCount
1051                         DE_NULL,                                                                                        // pWaitSemaphores
1052                         (const VkPipelineStageFlags*)DE_NULL,                           // pWaitDstStageMask
1053                         1u,                                                                                                     // commandBufferCount
1054                         &cmdBuffer.get(),                                                                       // pCommandBuffers
1055                         0u,                                                                                                     // signalSemaphoreCount
1056                         DE_NULL,                                                                                        // pSignalSemaphores
1057                 };
1058
1059                 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1060                 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
1061         }
1062
1063         releaseProfilingLock();
1064
1065         VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1066
1067         if (verifyQueryResults(*queryPool))
1068                 return tcu::TestStatus::pass("Pass");
1069         return tcu::TestStatus::fail("Fail");
1070 }
1071
1072 class ComputeMultiplePoolsTest : public ComputeQueryTestBase
1073 {
1074 public:
1075                                         ComputeMultiplePoolsTest        (vkt::Context&  context);
1076         tcu::TestStatus iterate                                         (void);
1077 };
1078
1079 ComputeMultiplePoolsTest::ComputeMultiplePoolsTest(vkt::Context& context)
1080         : ComputeQueryTestBase(context)
1081 {
1082 }
1083
1084 tcu::TestStatus ComputeMultiplePoolsTest::iterate(void)
1085 {
1086         if (!m_context.getPerformanceQueryFeatures().performanceCounterMultipleQueryPools)
1087                 throw tcu::NotSupportedError("MultipleQueryPools not supported");
1088
1089         const DeviceInterface&                  vkd = m_context.getDeviceInterface();
1090         const VkDevice                                  device = m_context.getDevice();
1091         const VkQueue                                   queue = m_context.getUniversalQueue();
1092         const CmdPoolCreateInfo                 cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
1093         const Unique<VkCommandPool>             cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
1094         const Unique<VkCommandBuffer>   resetCmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1095         const Unique<VkCommandBuffer>   cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1096
1097         initStateObjects();
1098         setupCounters();
1099
1100         vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)),
1101                                                         queryPool2(createQueryPool(1, 2));
1102
1103         if (!acquireProfilingLock())
1104         {
1105                 // lock was not acquired in given time, we can't fail the test
1106                 return tcu::TestStatus::pass("Pass");
1107         }
1108
1109         const VkQueryPool queryPools[] =
1110         {
1111                 *queryPool1,
1112                 *queryPool2
1113         };
1114
1115         beginCommandBuffer(vkd, *resetCmdBuffer);
1116         vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[0], 0u, 1u);
1117         vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[1], 0u, 1u);
1118         endCommandBuffer(vkd, *resetCmdBuffer);
1119
1120         beginCommandBuffer(vkd, *cmdBuffer);
1121         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
1122         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
1123
1124         // perform two queries
1125         for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
1126         {
1127                 const VkQueryPool queryPool = queryPools[loop];
1128                 vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
1129                 vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1130                 vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
1131         }
1132
1133         vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1134                 (VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
1135         endCommandBuffer(vkd, *cmdBuffer);
1136
1137         // submit reset of queries only once
1138         {
1139                 const VkSubmitInfo submitInfo =
1140                 {
1141                         VK_STRUCTURE_TYPE_SUBMIT_INFO,                                          // sType
1142                         DE_NULL,                                                                                        // pNext
1143                         0u,                                                                                                     // waitSemaphoreCount
1144                         DE_NULL,                                                                                        // pWaitSemaphores
1145                         (const VkPipelineStageFlags*)DE_NULL,                           // pWaitDstStageMask
1146                         1u,                                                                                                     // commandBufferCount
1147                         &resetCmdBuffer.get(),                                                          // pCommandBuffers
1148                         0u,                                                                                                     // signalSemaphoreCount
1149                         DE_NULL,                                                                                        // pSignalSemaphores
1150                 };
1151
1152                 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1153         }
1154
1155         // submit command buffer for each pass and wait for its completion
1156         for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
1157         {
1158                 const Unique<VkFence> fence(createFence(vkd, device));
1159
1160                 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
1161                 {
1162                         VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
1163                         NULL,
1164                         passIndex
1165                 };
1166
1167                 const VkSubmitInfo submitInfo =
1168                 {
1169                         VK_STRUCTURE_TYPE_SUBMIT_INFO,                                          // sType
1170                         &performanceQuerySubmitInfo,                                            // pNext
1171                         0u,                                                                                                     // waitSemaphoreCount
1172                         DE_NULL,                                                                                        // pWaitSemaphores
1173                         (const VkPipelineStageFlags*)DE_NULL,                           // pWaitDstStageMask
1174                         1u,                                                                                                     // commandBufferCount
1175                         &cmdBuffer.get(),                                                                       // pCommandBuffers
1176                         0u,                                                                                                     // signalSemaphoreCount
1177                         DE_NULL,                                                                                        // pSignalSemaphores
1178                 };
1179
1180                 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1181                 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
1182         }
1183
1184         releaseProfilingLock();
1185
1186         VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1187
1188         if (verifyQueryResults(*queryPool1) && verifyQueryResults(*queryPool2))
1189                 return tcu::TestStatus::pass("Pass");
1190         return tcu::TestStatus::fail("Fail");
1191 }
1192
1193 enum TestType
1194 {
1195         TT_ENUMERATE_AND_VALIDATE       = 0,
1196         TT_QUERY,
1197         TT_MULTIPLE_POOLS
1198 };
1199
1200 class QueryPoolPerformanceTest : public TestCase
1201 {
1202 public:
1203         QueryPoolPerformanceTest (tcu::TestContext &context, TestType testType, VkQueueFlagBits queueFlagBits, const char *name)
1204                 : TestCase                      (context, name, "")
1205                 , m_testType            (testType)
1206                 , m_queueFlagBits       (queueFlagBits)
1207         {
1208         }
1209
1210         vkt::TestInstance* createInstance (vkt::Context& context) const
1211         {
1212                 if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1213                         return new EnumerateAndValidateTest(context, m_queueFlagBits);
1214
1215                 if (m_queueFlagBits == VK_QUEUE_GRAPHICS_BIT)
1216                 {
1217                         if (m_testType == TT_QUERY)
1218                                 return new GraphicQueryTest(context);
1219                         return new GraphicMultiplePoolsTest(context);
1220                 }
1221
1222                 // tests for VK_QUEUE_COMPUTE_BIT
1223                 if (m_testType == TT_QUERY)
1224                         return new ComputeQueryTest(context);
1225                 return new ComputeMultiplePoolsTest(context);
1226         }
1227
1228         void initPrograms (SourceCollections& programCollection) const
1229         {
1230                 // validation test do not need programs
1231                 if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1232                         return;
1233
1234                 if (m_queueFlagBits == VK_QUEUE_COMPUTE_BIT)
1235                 {
1236                         programCollection.glslSources.add("comp")
1237                                 << glu::ComputeSource("#version 430\n"
1238                                                                           "layout (local_size_x = 1) in;\n"
1239                                                                           "layout(binding = 0) writeonly buffer Output {\n"
1240                                                                           "             uint values[];\n"
1241                                                                           "} sb_out;\n\n"
1242                                                                           "void main (void) {\n"
1243                                                                           "             uint index = uint(gl_GlobalInvocationID.x);\n"
1244                                                                           "             sb_out.values[index] += gl_GlobalInvocationID.y*2;\n"
1245                                                                           "}\n");
1246                         return;
1247                 }
1248
1249                 programCollection.glslSources.add("frag")
1250                         << glu::FragmentSource("#version 430\n"
1251                                                                    "layout(location = 0) out vec4 out_FragColor;\n"
1252                                                                    "void main()\n"
1253                                                                    "{\n"
1254                                                                    "    out_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
1255                                                                    "}\n");
1256
1257                 programCollection.glslSources.add("vert")
1258                         << glu::VertexSource("#version 430\n"
1259                                                                  "layout(location = 0) in vec4 in_Position;\n"
1260                                                                  "out gl_PerVertex { vec4 gl_Position; float gl_PointSize; };\n"
1261                                                                  "void main() {\n"
1262                                                                  "      gl_Position  = in_Position;\n"
1263                                                                  "      gl_PointSize = 1.0;\n"
1264                                                                  "}\n");
1265         }
1266
1267 private:
1268
1269         TestType                        m_testType;
1270         VkQueueFlagBits         m_queueFlagBits;
1271 };
1272
1273 } //anonymous
1274
1275 QueryPoolPerformanceTests::QueryPoolPerformanceTests (tcu::TestContext &testCtx)
1276         : TestCaseGroup(testCtx, "performance_query", "Tests for performance queries")
1277 {
1278 }
1279
1280 void QueryPoolPerformanceTests::init (void)
1281 {
1282         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_GRAPHICS_BIT, "enumerate_and_validate_graphic"));
1283         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_COMPUTE_BIT,  "enumerate_and_validate_compute"));
1284         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_GRAPHICS_BIT, "query_graphic"));
1285         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_COMPUTE_BIT, "query_compute"));
1286         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_GRAPHICS_BIT, "multiple_pools_graphic"));
1287         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_COMPUTE_BIT, "multiple_pools_compute"));
1288 }
1289
1290 } //QueryPool
1291 } //vkt