zink: rework query pool overflow
[platform/upstream/mesa.git] / src / gallium / drivers / zink / zink_query.c
1 #include "zink_query.h"
2
3 #include "zink_context.h"
4 #include "zink_clear.h"
5 #include "zink_program.h"
6 #include "zink_resource.h"
7 #include "zink_screen.h"
8
9 #include "util/u_dump.h"
10 #include "util/u_inlines.h"
11 #include "util/u_memory.h"
12
13 #if DETECT_ARCH_X86_64 || DETECT_ARCH_PPC_64 || DETECT_ARCH_AARCH64 || DETECT_ARCH_MIPS64
14 #define NUM_QUERIES 5000
15 #else
16 #define NUM_QUERIES 500
17 #endif
18
19 struct zink_query_pool {
20    struct list_head list;
21    VkQueryType vk_query_type;
22    VkQueryPipelineStatisticFlags pipeline_stats;
23    VkQueryPool query_pool;
24    unsigned last_range;
25    unsigned refcount;
26 };
27
28 struct zink_query_buffer {
29    struct list_head list;
30    unsigned num_results;
31    struct pipe_resource *buffers[PIPE_MAX_VERTEX_STREAMS];
32 };
33
34 struct zink_vk_query {
35    struct zink_query_pool *pool;
36    unsigned query_id;
37    bool needs_reset;
38    bool started;
39    uint32_t refcount;
40 };
41
42 struct zink_query_start {
43    union {
44       struct {
45          bool have_gs;
46          bool have_xfb;
47          bool was_line_loop;
48       };
49       uint32_t data;
50    };
51    struct zink_vk_query *vkq[PIPE_MAX_VERTEX_STREAMS];
52 };
53
54 struct zink_query {
55    struct threaded_query base;
56    enum pipe_query_type type;
57
58    /* Everytime the gallium query needs
59     * another vulkan query, add a new start.
60     */
61    struct util_dynarray starts;
62    unsigned start_offset;
63
64    VkQueryType vkqtype;
65    unsigned index;
66    bool precise;
67
68    bool active; /* query is considered active by vk */
69    bool needs_reset; /* query is considered active by vk and cannot be destroyed */
70    bool dead; /* query should be destroyed when its fence finishes */
71    bool needs_update; /* query needs to update its qbos */
72    bool needs_rast_discard_workaround; /* query needs discard disabled */
73    bool suspended;
74
75    struct list_head active_list;
76
77    struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */
78    bool has_draws; /* have_gs and have_xfb are valid for idx=curr_query */
79
80    struct zink_batch_usage *batch_uses; //batch that the query was started in
81
82    struct list_head buffers;
83    union {
84       struct zink_query_buffer *curr_qbo;
85       struct pipe_fence_handle *fence; //PIPE_QUERY_GPU_FINISHED
86    };
87
88    struct zink_resource *predicate;
89    bool predicate_dirty;
90 };
91
92 static inline int
93 get_num_starts(struct zink_query *q)
94 {
95    return util_dynarray_num_elements(&q->starts, struct zink_query_start);
96 }
97
98 static void
99 update_query_id(struct zink_context *ctx, struct zink_query *q);
100
101
102 static VkQueryPipelineStatisticFlags
103 pipeline_statistic_convert(enum pipe_statistics_query_index idx)
104 {
105    unsigned map[] = {
106       [PIPE_STAT_QUERY_IA_VERTICES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT,
107       [PIPE_STAT_QUERY_IA_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,
108       [PIPE_STAT_QUERY_VS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT,
109       [PIPE_STAT_QUERY_GS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT,
110       [PIPE_STAT_QUERY_GS_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT,
111       [PIPE_STAT_QUERY_C_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,
112       [PIPE_STAT_QUERY_C_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,
113       [PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,
114       [PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,
115       [PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,
116       [PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT
117    };
118    assert(idx < ARRAY_SIZE(map));
119    return map[idx];
120 }
121
122 static void
123 begin_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index,
124                        VkQueryControlFlags flags)
125 {
126    struct zink_batch *batch = &ctx->batch;
127    if (!vkq->started) {
128       VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf,
129                                      vkq->pool->query_pool,
130                                      vkq->query_id,
131                                      flags,
132                                      index);
133       vkq->started = true;
134    }
135 }
136
137 static void
138 end_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index)
139 {
140    struct zink_batch *batch = &ctx->batch;
141    if (vkq->started) {
142       VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf,
143                                    vkq->pool->query_pool,
144                                    vkq->query_id, index);
145       vkq->started = false;
146    }
147 }
148
149 static void
150 reset_vk_query_pool(struct zink_context *ctx, struct zink_vk_query *vkq)
151 {
152    struct zink_batch *batch = &ctx->batch;
153    if (vkq->needs_reset) {
154       VKCTX(CmdResetQueryPool)(batch->state->barrier_cmdbuf, vkq->pool->query_pool, vkq->query_id, 1);
155       batch->state->has_barriers = true;
156    }
157    vkq->needs_reset = false;
158 }
159
160 void
161 zink_context_destroy_query_pools(struct zink_context *ctx)
162 {
163    struct zink_screen *screen = zink_screen(ctx->base.screen);
164    list_for_each_entry_safe(struct zink_query_pool, pool, &ctx->query_pools, list) {
165       VKSCR(DestroyQueryPool)(screen->dev, pool->query_pool, NULL);
166       list_del(&pool->list);
167       FREE(pool);
168    }
169 }
170
171 static struct zink_query_pool *
172 find_or_allocate_qp(struct zink_context *ctx, struct zink_query *q, unsigned idx)
173 {
174    VkQueryPipelineStatisticFlags pipeline_stats = 0;
175    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
176       pipeline_stats = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
177                        VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT;
178    else if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE)
179       pipeline_stats = pipeline_statistic_convert(q->index);
180
181    VkQueryType vk_query_type = q->vkqtype;
182    /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */
183    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && idx == 1) {
184       vk_query_type = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
185       pipeline_stats = 0;
186    }
187
188    struct zink_screen *screen = zink_screen(ctx->base.screen);
189    list_for_each_entry(struct zink_query_pool, pool, &ctx->query_pools, list) {
190       if (pool->vk_query_type == vk_query_type) {
191          if (vk_query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
192             if (pool->pipeline_stats == pipeline_stats)
193                return pool;
194          } else
195             return pool;
196       }
197    }
198
199    struct zink_query_pool *new_pool = CALLOC_STRUCT(zink_query_pool);
200    if (!new_pool)
201       return NULL;
202
203    new_pool->vk_query_type = vk_query_type;
204    new_pool->pipeline_stats = pipeline_stats;
205
206    VkQueryPoolCreateInfo pool_create = {0};
207    pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
208    pool_create.queryType = vk_query_type;
209    pool_create.queryCount = NUM_QUERIES;
210    pool_create.pipelineStatistics = pipeline_stats;
211
212    VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &new_pool->query_pool);
213    if (status != VK_SUCCESS) {
214       mesa_loge("ZINK: vkCreateQueryPool failed (%s)", vk_Result_to_str(status));
215       FREE(new_pool);
216       return NULL;
217    }
218
219    list_addtail(&new_pool->list, &ctx->query_pools);
220    return new_pool;
221 }
222
223 static void
224 update_qbo(struct zink_context *ctx, struct zink_query *q);
225 static void
226 reset_qbos(struct zink_context *ctx, struct zink_query *q);
227
228
229 static bool
230 is_emulated_primgen(const struct zink_query *q)
231 {
232    return q->type == PIPE_QUERY_PRIMITIVES_GENERATED &&
233           q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT;
234 }
235
236 static inline unsigned
237 get_num_query_pools(struct zink_query *q)
238 {
239    if (is_emulated_primgen(q))
240       return 2;
241    return 1;
242 }
243
244 static inline unsigned
245 get_num_queries(struct zink_query *q)
246 {
247    if (is_emulated_primgen(q))
248       return 2;
249    if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
250       return PIPE_MAX_VERTEX_STREAMS;
251    return 1;
252 }
253
254 static inline unsigned
255 get_num_results(struct zink_query *q)
256 {
257    if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
258       return 1;
259    switch (q->type) {
260    case PIPE_QUERY_OCCLUSION_COUNTER:
261    case PIPE_QUERY_OCCLUSION_PREDICATE:
262    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
263    case PIPE_QUERY_TIME_ELAPSED:
264    case PIPE_QUERY_TIMESTAMP:
265    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
266       return 1;
267    case PIPE_QUERY_PRIMITIVES_GENERATED:
268    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
269    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
270    case PIPE_QUERY_PRIMITIVES_EMITTED:
271       return 2;
272    default:
273       debug_printf("unknown query: %s\n",
274                    util_str_query_type(q->type, true));
275       unreachable("zink: unknown query type");
276    }
277 }
278
279 static void
280 timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp)
281 {
282    /* The number of valid bits in a timestamp value is determined by
283     * the VkQueueFamilyProperties::timestampValidBits property of the queue on which the timestamp is written.
284     * - 17.5. Timestamp Queries
285     */
286    if (screen->timestamp_valid_bits < 64)
287       *timestamp &= (1ull << screen->timestamp_valid_bits) - 1;
288
289    /* The number of nanoseconds it takes for a timestamp value to be incremented by 1
290     * can be obtained from VkPhysicalDeviceLimits::timestampPeriod
291     * - 17.5. Timestamp Queries
292     */
293    *timestamp *= (double)screen->info.props.limits.timestampPeriod;
294 }
295
296 static VkQueryType
297 convert_query_type(struct zink_screen *screen, enum pipe_query_type query_type, bool *precise)
298 {
299    *precise = false;
300    switch (query_type) {
301    case PIPE_QUERY_OCCLUSION_COUNTER:
302       *precise = true;
303       FALLTHROUGH;
304    case PIPE_QUERY_OCCLUSION_PREDICATE:
305    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
306       return VK_QUERY_TYPE_OCCLUSION;
307    case PIPE_QUERY_TIME_ELAPSED:
308    case PIPE_QUERY_TIMESTAMP:
309       return VK_QUERY_TYPE_TIMESTAMP;
310    case PIPE_QUERY_PRIMITIVES_GENERATED:
311       return screen->info.have_EXT_primitives_generated_query ?
312              VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT :
313              VK_QUERY_TYPE_PIPELINE_STATISTICS;
314    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
315       return VK_QUERY_TYPE_PIPELINE_STATISTICS;
316    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
317    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
318    case PIPE_QUERY_PRIMITIVES_EMITTED:
319       return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
320    default:
321       debug_printf("unknown query: %s\n",
322                    util_str_query_type(query_type, true));
323       unreachable("zink: unknown query type");
324    }
325 }
326
327 static bool
328 needs_stats_list(struct zink_query *query)
329 {
330    return is_emulated_primgen(query) ||
331           query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
332           query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
333 }
334
335 static bool
336 is_time_query(struct zink_query *query)
337 {
338    return query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED;
339 }
340
341 static bool
342 is_so_overflow_query(struct zink_query *query)
343 {
344    return query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
345 }
346
347 static bool
348 is_bool_query(struct zink_query *query)
349 {
350    return is_so_overflow_query(query) ||
351           query->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
352           query->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
353           query->type == PIPE_QUERY_GPU_FINISHED;
354 }
355
356 static bool
357 qbo_append(struct pipe_screen *screen, struct zink_query *query)
358 {
359    if (query->curr_qbo && query->curr_qbo->list.next)
360       return true;
361    struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer);
362    if (!qbo)
363       return false;
364    int num_buffers = get_num_queries(query);
365
366    for (unsigned i = 0; i < num_buffers; i++) {
367       qbo->buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
368                                            PIPE_USAGE_STAGING,
369                                            /* this is the maximum possible size of the results in a given buffer */
370                                            NUM_QUERIES * get_num_results(query) * sizeof(uint64_t));
371       if (!qbo->buffers[i])
372          goto fail;
373    }
374    list_addtail(&qbo->list, &query->buffers);
375
376    return true;
377 fail:
378    for (unsigned i = 0; i < num_buffers; i++)
379       pipe_resource_reference(&qbo->buffers[i], NULL);
380    FREE(qbo);
381    return false;
382 }
383
384 static void
385 unref_vk_pool(struct zink_screen *screen, struct zink_query_pool *pool)
386 {
387    if (!pool || --pool->refcount)
388       return;
389    VKSCR(DestroyQueryPool)(screen->dev, pool->query_pool, NULL);
390    if (list_is_linked(&pool->list))
391       list_del(&pool->list);
392    FREE(pool);
393 }
394
395 static void
396 unref_vk_query(struct zink_screen *screen, struct zink_vk_query *vkq)
397 {
398    if (!vkq)
399       return;
400    unref_vk_pool(screen, vkq->pool);
401    vkq->refcount--;
402    if (vkq->refcount == 0)
403       FREE(vkq);
404 }
405
406 static void
407 destroy_query(struct zink_screen *screen, struct zink_query *query)
408 {
409    assert(zink_screen_usage_check_completion(screen, query->batch_uses));
410    struct zink_query_buffer *qbo, *next;
411
412    struct zink_query_start *starts = query->starts.data;
413    unsigned num_starts = query->starts.capacity / sizeof(struct zink_query_start);
414    for (unsigned j = 0; j < num_starts; j++) {
415       for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
416          unref_vk_query(screen, starts[j].vkq[i]);
417       }
418    }
419
420    util_dynarray_fini(&query->starts);
421    LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) {
422       for (unsigned i = 0; i < ARRAY_SIZE(qbo->buffers); i++)
423          pipe_resource_reference(&qbo->buffers[i], NULL);
424       FREE(qbo);
425    }
426    pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL);
427    FREE(query);
428 }
429
430 static void
431 reset_qbo(struct zink_query *q)
432 {
433    q->curr_qbo = list_first_entry(&q->buffers, struct zink_query_buffer, list);
434    q->curr_qbo->num_results = 0;
435 }
436
437 static void
438 query_pool_get_range(struct zink_context *ctx, struct zink_query *q)
439 {
440    bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
441    struct zink_query_start *start;
442    int num_queries = get_num_queries(q);
443    if (!is_timestamp || get_num_starts(q) == 0) {
444       size_t size = q->starts.capacity;
445       start = util_dynarray_grow(&q->starts, struct zink_query_start, 1);
446       if (size != q->starts.capacity) {
447          /* when resizing, always zero the new data to avoid garbage */
448          uint8_t *data = q->starts.data;
449          memset(data + size, 0, q->starts.capacity - size);
450       }
451    } else {
452       start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
453    }
454    start->data = 0;
455
456    unsigned num_pools = get_num_query_pools(q);
457    for (unsigned i = 0; i < num_queries; i++) {
458       int pool_idx = num_pools > 1 ? i : 0;
459       /* try and find the active query for this */
460       struct zink_vk_query *vkq;
461       int xfb_idx = num_queries == 4 ? i : q->index;
462       if ((q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
463            (pool_idx == 1)) && ctx->curr_xfb_queries[xfb_idx]) {
464          vkq = ctx->curr_xfb_queries[xfb_idx];
465          vkq->refcount++;
466          vkq->pool->refcount++;
467       } else {
468          struct zink_query_pool *pool = find_or_allocate_qp(ctx, q, pool_idx);
469          pool->refcount++;
470          pool->last_range++;
471          if (pool->last_range == NUM_QUERIES) {
472             list_del(&pool->list);
473             pool = find_or_allocate_qp(ctx, q, pool_idx);
474          }
475          vkq = CALLOC_STRUCT(zink_vk_query);
476
477          vkq->refcount = 1;
478          vkq->needs_reset = true;
479          vkq->pool = pool;
480          vkq->started = false;
481          vkq->query_id = pool->last_range;
482
483       }
484       unref_vk_query(zink_screen(ctx->base.screen), start->vkq[i]);
485       start->vkq[i] = vkq;
486    }
487 }
488
489 static struct pipe_query *
490 zink_create_query(struct pipe_context *pctx,
491                   unsigned query_type, unsigned index)
492 {
493    struct zink_screen *screen = zink_screen(pctx->screen);
494    struct zink_query *query = CALLOC_STRUCT(zink_query);
495
496    if (!query)
497       return NULL;
498    list_inithead(&query->buffers);
499
500    query->index = index;
501    query->type = query_type;
502    if (query->type == PIPE_QUERY_GPU_FINISHED || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT)
503       return (struct pipe_query *)query;
504    query->vkqtype = convert_query_type(screen, query_type, &query->precise);
505    if (query->vkqtype == -1)
506       return NULL;
507
508    util_dynarray_init(&query->starts, NULL);
509
510    assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION);
511
512    /* use emulated path for drivers without full support */
513    if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && index &&
514        !screen->info.primgen_feats.primitivesGeneratedQueryWithNonZeroStreams)
515       query->vkqtype = VK_QUERY_TYPE_PIPELINE_STATISTICS;
516
517    if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
518       query->needs_rast_discard_workaround = !screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard;
519    } else if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) {
520       query->needs_rast_discard_workaround = true;
521    }
522
523    if (!qbo_append(pctx->screen, query))
524       goto fail;
525    struct zink_batch *batch = &zink_context(pctx)->batch;
526    batch->has_work = true;
527    query->needs_reset = true;
528    if (query->type == PIPE_QUERY_TIMESTAMP) {
529       query->active = true;
530       /* defer pool reset until end_query since we're guaranteed to be threadsafe then */
531       reset_qbo(query);
532    }
533    return (struct pipe_query *)query;
534 fail:
535    destroy_query(screen, query);
536    return NULL;
537 }
538
539 static void
540 zink_destroy_query(struct pipe_context *pctx,
541                    struct pipe_query *q)
542 {
543    struct zink_screen *screen = zink_screen(pctx->screen);
544    struct zink_query *query = (struct zink_query *)q;
545
546    /* only destroy if this query isn't active on any batches,
547     * otherwise just mark dead and wait
548     */
549    if (query->batch_uses) {
550       p_atomic_set(&query->dead, true);
551       return;
552    }
553
554    destroy_query(screen, query);
555 }
556
557 void
558 zink_prune_query(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_query *query)
559 {
560    if (!zink_batch_usage_matches(query->batch_uses, bs))
561       return;
562    query->batch_uses = NULL;
563    if (p_atomic_read(&query->dead))
564       destroy_query(screen, query);
565 }
566
567 static void
568 check_query_results(struct zink_query *query, union pipe_query_result *result,
569                     int num_starts, uint64_t *results, uint64_t *xfb_results)
570 {
571    uint64_t last_val = 0;
572    int result_size = get_num_results(query);
573    int idx = 0;
574    util_dynarray_foreach(&query->starts, struct zink_query_start, start) {
575       unsigned i = idx * result_size;
576       idx++;
577       switch (query->type) {
578       case PIPE_QUERY_OCCLUSION_PREDICATE:
579       case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
580       case PIPE_QUERY_GPU_FINISHED:
581          result->b |= results[i] != 0;
582          break;
583
584       case PIPE_QUERY_TIME_ELAPSED:
585       case PIPE_QUERY_TIMESTAMP:
586          /* the application can sum the differences between all N queries to determine the total execution time.
587           * - 17.5. Timestamp Queries
588           */
589          if (query->type != PIPE_QUERY_TIME_ELAPSED || i)
590             result->u64 += results[i] - last_val;
591          last_val = results[i];
592          break;
593       case PIPE_QUERY_OCCLUSION_COUNTER:
594          result->u64 += results[i];
595          break;
596       case PIPE_QUERY_PRIMITIVES_GENERATED:
597          if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
598             result->u64 += results[i];
599          else if (start->have_xfb || query->index)
600             result->u64 += xfb_results[i + 1];
601          else
602             /* if a given draw had a geometry shader, we need to use the first result */
603             result->u64 += results[i + !start->have_gs];
604          break;
605       case PIPE_QUERY_PRIMITIVES_EMITTED:
606          /* A query pool created with this type will capture 2 integers -
607           * numPrimitivesWritten and numPrimitivesNeeded -
608           * for the specified vertex stream output from the last vertex processing stage.
609           * - from VK_EXT_transform_feedback spec
610           */
611          result->u64 += results[i];
612          break;
613       case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
614       case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
615          /* A query pool created with this type will capture 2 integers -
616           * numPrimitivesWritten and numPrimitivesNeeded -
617           * for the specified vertex stream output from the last vertex processing stage.
618           * - from VK_EXT_transform_feedback spec
619           */
620          if (start->have_xfb)
621             result->b |= results[i] != results[i + 1];
622          break;
623       case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
624          switch (query->index) {
625          case PIPE_STAT_QUERY_IA_VERTICES:
626             result->u64 += start->was_line_loop ? results[i] / 2 : results[i];
627             break;
628          default:
629             result->u64 += results[i];
630             break;
631          }
632          break;
633
634       default:
635          debug_printf("unhandled query type: %s\n",
636                       util_str_query_type(query->type, true));
637          unreachable("unexpected query type");
638       }
639    }
640 }
641
642 static bool
643 get_query_result(struct pipe_context *pctx,
644                       struct pipe_query *q,
645                       bool wait,
646                       union pipe_query_result *result)
647 {
648    struct zink_screen *screen = zink_screen(pctx->screen);
649    struct zink_query *query = (struct zink_query *)q;
650    unsigned flags = PIPE_MAP_READ;
651
652    if (!wait)
653       flags |= PIPE_MAP_DONTBLOCK;
654    if (query->base.flushed)
655       /* this is not a context-safe operation; ensure map doesn't use slab alloc */
656       flags |= PIPE_MAP_THREAD_SAFE;
657
658    util_query_clear_result(result, query->type);
659
660    int num_starts = get_num_starts(query);
661    /* no results: return zero */
662    if (!num_starts)
663       return true;
664    int result_size = get_num_results(query) * sizeof(uint64_t);
665    int num_maps = get_num_queries(query);
666
667    struct zink_query_buffer *qbo;
668    struct pipe_transfer *xfer[PIPE_MAX_VERTEX_STREAMS] = { 0 };
669    LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) {
670       uint64_t *results[PIPE_MAX_VERTEX_STREAMS] = { NULL, NULL };
671       bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP;
672       if (!qbo->num_results)
673          continue;
674
675       for (unsigned i = 0; i < num_maps; i++) {
676          results[i] = pipe_buffer_map_range(pctx, qbo->buffers[i], 0,
677                                             (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer[i]);
678          if (!results[i]) {
679             if (wait)
680                debug_printf("zink: qbo read failed!");
681             goto fail;
682          }
683       }
684       if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
685          for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS && !result->b; i++) {
686             check_query_results(query, result, num_starts, results[i], NULL);
687          }
688       } else
689          check_query_results(query, result, num_starts, results[0], results[1]);
690
691       for (unsigned i = 0 ; i < num_maps; i++)
692          pipe_buffer_unmap(pctx, xfer[i]);
693
694       /* if overflow is detected we can stop */
695       if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE && result->b)
696          break;
697    }
698
699    if (is_time_query(query))
700       timestamp_to_nanoseconds(screen, &result->u64);
701
702    return true;
703 fail:
704    for (unsigned i = 0 ; i < num_maps; i++)
705       if (xfer[i])
706          pipe_buffer_unmap(pctx, xfer[i]);
707    return false;
708 }
709
710 static void
711 force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_query_value_type result_type, struct pipe_resource *pres, unsigned offset)
712 {
713    struct pipe_context *pctx = &ctx->base;
714    unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
715    struct zink_query *query = (struct zink_query*)pquery;
716    union pipe_query_result result;
717
718    if (query->needs_update)
719       update_qbo(ctx, query);
720
721    bool success = get_query_result(pctx, pquery, true, &result);
722    if (!success) {
723       debug_printf("zink: getting query result failed\n");
724       return;
725    }
726
727    if (result_type <= PIPE_QUERY_TYPE_U32) {
728       uint32_t u32;
729       uint32_t limit;
730       if (result_type == PIPE_QUERY_TYPE_I32)
731          limit = INT_MAX;
732       else
733          limit = UINT_MAX;
734       if (is_bool_query(query))
735          u32 = result.b;
736       else
737          u32 = MIN2(limit, result.u64);
738       tc_buffer_write(pctx, pres, offset, result_size, &u32);
739    } else {
740       uint64_t u64;
741       if (is_bool_query(query))
742          u64 = result.b;
743       else
744          u64 = result.u64;
745       tc_buffer_write(pctx, pres, offset, result_size, &u64);
746    }
747 }
748
749 static void
750 copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, VkQueryPool pool,
751                             unsigned query_id, struct zink_resource *res, unsigned offset,
752                             int num_results, VkQueryResultFlags flags)
753 {
754    struct zink_batch *batch = &ctx->batch;
755    unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t);
756    unsigned base_result_size = get_num_results(query) * type_size;
757    unsigned result_size = base_result_size * num_results;
758    if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
759       result_size += type_size;
760
761    bool marker = zink_cmd_debug_marker_begin(ctx, "update_qbo(%s: id=%u, num_results=%d)", vk_QueryType_to_str(query->vkqtype), query_id, num_results);
762
763    zink_batch_no_rp(ctx);
764    /* if it's a single query that doesn't need special handling, we can copy it and be done */
765    zink_batch_reference_resource_rw(batch, res, true);
766    zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_TRANSFER_WRITE_BIT, 0);
767    util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size);
768    assert(query_id < NUM_QUERIES);
769    res->obj->unordered_read = res->obj->unordered_write = false;
770    VKCTX(CmdCopyQueryPoolResults)(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer,
771                                   offset, base_result_size, flags);
772    zink_cmd_debug_marker_end(ctx, marker);
773 }
774
775 static void
776 copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags)
777 {
778    struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
779    copy_pool_results_to_buffer(ctx, query, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, res, offset, num_results, flags);
780 }
781
782
783 static void
784 reset_query_range(struct zink_context *ctx, struct zink_query *q)
785 {
786    int num_queries = get_num_queries(q);
787    struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
788    for (unsigned i = 0; i < num_queries; i++) {
789       reset_vk_query_pool(ctx, start->vkq[i]);
790    }
791 }
792
793 static void
794 reset_qbos(struct zink_context *ctx, struct zink_query *q)
795 {
796    if (q->needs_update)
797       update_qbo(ctx, q);
798
799    q->needs_reset = false;
800    /* create new qbo for non-timestamp queries:
801     * timestamp queries should never need more than 2 entries in the qbo
802     */
803    if (q->type == PIPE_QUERY_TIMESTAMP)
804       return;
805    if (qbo_append(ctx->base.screen, q))
806       reset_qbo(q);
807    else
808       debug_printf("zink: qbo alloc failed on reset!");
809 }
810
811 static inline unsigned
812 get_buffer_offset(struct zink_query *q)
813 {
814    return (get_num_starts(q) - 1) * get_num_results(q) * sizeof(uint64_t);
815 }
816
817 static void
818 update_qbo(struct zink_context *ctx, struct zink_query *q)
819 {
820    struct zink_query_buffer *qbo = q->curr_qbo;
821    unsigned num_starts = get_num_starts(q);
822    struct zink_query_start *starts = q->starts.data;
823    bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
824    /* timestamp queries just write to offset 0 always */
825    int num_queries = get_num_queries(q);
826    for (unsigned j = q->start_offset; j < num_starts; j++) {
827       unsigned cur_offset = q->curr_qbo->num_results * get_num_results(q) * sizeof(uint64_t);
828       for (unsigned i = 0; i < num_queries; i++) {
829          unsigned offset = is_timestamp ? 0 : cur_offset;
830          copy_pool_results_to_buffer(ctx, q, starts[j].vkq[i]->pool->query_pool, starts[j].vkq[i]->query_id,
831                                     zink_resource(qbo->buffers[i]),
832                                     offset,
833                                     1,
834                                     /*
835                                        there is an implicit execution dependency from
836                                        each such query command to all query commands previously submitted to the same queue. There
837                                        is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
838                                        include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
839                                        the results of vkCmdEndQuery are available.
840
841                                     * - Chapter 18. Queries
842                                     */
843                                     VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
844       }
845       if (!is_timestamp) {
846          q->curr_qbo->num_results++;
847          q->start_offset++;
848       }
849    }
850
851
852    if (is_timestamp)
853       q->curr_qbo->num_results = 1;
854
855    q->needs_update = false;
856 }
857
858 static void
859 begin_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
860 {
861    VkQueryControlFlags flags = 0;
862
863    if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT)
864       return;
865
866    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_CS_INVOCATIONS && ctx->batch.in_rp) {
867       /* refuse to start CS queries in renderpasses */
868       if (!list_is_linked(&q->active_list))
869          list_addtail(&q->active_list, &ctx->suspended_queries);
870       q->suspended = true;
871       return;
872    }
873
874    update_query_id(ctx, q);
875    q->predicate_dirty = true;
876    if (q->needs_reset)
877       reset_qbos(ctx, q);
878    reset_query_range(ctx, q);
879    q->active = true;
880    batch->has_work = true;
881
882    struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
883    if (q->type == PIPE_QUERY_TIME_ELAPSED) {
884       VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
885       if (!batch->in_rp)
886          update_qbo(ctx, q);
887       zink_batch_usage_set(&q->batch_uses, batch->state);
888       _mesa_set_add(&batch->state->active_queries, q);
889    }
890    /* ignore the rest of begin_query for timestamps */
891    if (is_time_query(q))
892       return;
893
894    if (q->precise)
895       flags |= VK_QUERY_CONTROL_PRECISE_BIT;
896
897    if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
898        is_emulated_primgen(q) ||
899        q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
900       struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
901       assert(!ctx->curr_xfb_queries[q->index] || ctx->curr_xfb_queries[q->index] == vkq);
902       ctx->curr_xfb_queries[q->index] = vkq;
903
904       begin_vk_query_indexed(ctx, vkq, q->index, flags);
905    } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
906       for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
907          assert(!ctx->curr_xfb_queries[i] || ctx->curr_xfb_queries[i] == start->vkq[i]);
908          ctx->curr_xfb_queries[i] = start->vkq[i];
909
910          begin_vk_query_indexed(ctx, start->vkq[i], i, flags);
911       }
912    } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
913       begin_vk_query_indexed(ctx, start->vkq[0], q->index, flags);
914    }
915    if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
916       VKCTX(CmdBeginQuery)(batch->state->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, flags);
917    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_IA_VERTICES)  {
918       assert(!ctx->vertices_query);
919       ctx->vertices_query = q;
920    }
921    if (needs_stats_list(q))
922       list_addtail(&q->stats_list, &ctx->primitives_generated_queries);
923    zink_batch_usage_set(&q->batch_uses, batch->state);
924    _mesa_set_add(&batch->state->active_queries, q);
925    if (q->needs_rast_discard_workaround) {
926       ctx->primitives_generated_active = true;
927       if (zink_set_rasterizer_discard(ctx, true))
928          zink_set_color_write_enables(ctx);
929    }
930 }
931
932 static bool
933 zink_begin_query(struct pipe_context *pctx,
934                  struct pipe_query *q)
935 {
936    struct zink_query *query = (struct zink_query *)q;
937    struct zink_context *ctx = zink_context(pctx);
938    struct zink_batch *batch = &ctx->batch;
939
940    /* drop all past results */
941    reset_qbo(query);
942
943    query->predicate_dirty = true;
944
945    util_dynarray_clear(&query->starts);
946    query->start_offset = 0;
947
948    /* A query must either begin and end inside the same subpass of a render pass
949       instance, or must both begin and end outside of a render pass instance
950       (i.e. contain entire render pass instances).
951       - 18.2. Query Operation
952
953     * tilers prefer out-of-renderpass queries for perf reasons, so force all queries
954     * out of renderpasses
955     */
956    zink_batch_no_rp(ctx);
957    begin_query(ctx, batch, query);
958
959    return true;
960 }
961
962 static void
963 update_query_id(struct zink_context *ctx, struct zink_query *q)
964 {
965    query_pool_get_range(ctx, q);
966    ctx->batch.has_work = true;
967    q->has_draws = false;
968 }
969
970 static void
971 end_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
972 {
973    if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT)
974       return;
975
976    ASSERTED struct zink_query_buffer *qbo = q->curr_qbo;
977    assert(qbo);
978    assert(!is_time_query(q));
979    q->active = false;
980    struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
981
982    if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
983        is_emulated_primgen(q) ||
984        q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
985       struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
986
987       end_vk_query_indexed(ctx, vkq, q->index);
988       ctx->curr_xfb_queries[q->index] = NULL;
989    }
990    else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
991       for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
992          end_vk_query_indexed(ctx, start->vkq[i], i);
993          ctx->curr_xfb_queries[i] = NULL;
994       }
995    } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
996       end_vk_query_indexed(ctx, start->vkq[0], q->index);
997    }
998    if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT &&
999        q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && !is_time_query(q))
1000       VKCTX(CmdEndQuery)(batch->state->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1001
1002    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
1003        q->index == PIPE_STAT_QUERY_IA_VERTICES)
1004       ctx->vertices_query = NULL;
1005
1006    if (needs_stats_list(q))
1007       list_delinit(&q->stats_list);
1008
1009    q->needs_update = true;
1010    if (q->needs_rast_discard_workaround) {
1011       ctx->primitives_generated_active = false;
1012       if (zink_set_rasterizer_discard(ctx, false))
1013          zink_set_color_write_enables(ctx);
1014    }
1015 }
1016
1017 static bool
1018 zink_end_query(struct pipe_context *pctx,
1019                struct pipe_query *q)
1020 {
1021    struct zink_context *ctx = zink_context(pctx);
1022    struct zink_query *query = (struct zink_query *)q;
1023    struct zink_batch *batch = &ctx->batch;
1024
1025    if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT)
1026       return true;
1027
1028    if (query->type == PIPE_QUERY_GPU_FINISHED) {
1029       pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED);
1030       return true;
1031    }
1032
1033    /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */
1034    threaded_context_unwrap_sync(pctx);
1035    zink_batch_no_rp(ctx);
1036
1037    if (needs_stats_list(query))
1038       list_delinit(&query->stats_list);
1039    if (query->suspended) {
1040       list_delinit(&query->active_list);
1041       query->suspended = false;
1042    }
1043    if (is_time_query(query)) {
1044       update_query_id(ctx, query);
1045       if (query->needs_reset)
1046          reset_qbos(ctx, query);
1047       reset_query_range(ctx, query);
1048       struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1049       VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
1050                                start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1051       zink_batch_usage_set(&query->batch_uses, batch->state);
1052       _mesa_set_add(&batch->state->active_queries, query);
1053       query->needs_update = true;
1054    } else if (query->active)
1055       end_query(ctx, batch, query);
1056
1057    return true;
1058 }
1059
1060 static bool
1061 zink_get_query_result(struct pipe_context *pctx,
1062                       struct pipe_query *q,
1063                       bool wait,
1064                       union pipe_query_result *result)
1065 {
1066    struct zink_query *query = (void*)q;
1067    struct zink_context *ctx = zink_context(pctx);
1068
1069    if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) {
1070       result->timestamp_disjoint.frequency = zink_screen(pctx->screen)->info.props.limits.timestampPeriod * 1000000.0;
1071       result->timestamp_disjoint.disjoint = false;
1072       return true;
1073    }
1074
1075    if (query->type == PIPE_QUERY_GPU_FINISHED) {
1076       struct pipe_screen *screen = pctx->screen;
1077
1078       result->b = screen->fence_finish(screen, query->base.flushed ? NULL : pctx,
1079                                         query->fence, wait ? PIPE_TIMEOUT_INFINITE : 0);
1080       return result->b;
1081    }
1082
1083    if (query->needs_update)
1084       update_qbo(ctx, query);
1085
1086    if (zink_batch_usage_is_unflushed(query->batch_uses)) {
1087       if (!threaded_query(q)->flushed)
1088          pctx->flush(pctx, NULL, 0);
1089       if (!wait)
1090          return false;
1091    }
1092
1093    return get_query_result(pctx, q, wait, result);
1094 }
1095
1096 static void
1097 suspend_query(struct zink_context *ctx, struct zink_query *query)
1098 {
1099    /* if a query isn't active here then we don't need to reactivate it on the next batch */
1100    if (query->active && !is_time_query(query))
1101       end_query(ctx, &ctx->batch, query);
1102    if (query->needs_update && !ctx->batch.in_rp)
1103       update_qbo(ctx, query);
1104 }
1105
1106 void
1107 zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch)
1108 {
1109    set_foreach(&batch->state->active_queries, entry) {
1110       struct zink_query *query = (void*)entry->key;
1111       if (query->suspended)
1112          continue;
1113       if (query->active && !is_time_query(query)) {
1114          /* the fence is going to steal the set off the batch, so we have to copy
1115           * the active queries onto a list
1116           */
1117          list_addtail(&query->active_list, &ctx->suspended_queries);
1118          query->suspended = true;
1119       }
1120       suspend_query(ctx, query);
1121    }
1122 }
1123
1124 void
1125 zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch)
1126 {
1127    struct zink_query *query, *next;
1128    LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1129       list_delinit(&query->active_list);
1130       query->suspended = false;
1131       if (query->needs_update && !ctx->batch.in_rp)
1132          update_qbo(ctx, query);
1133       begin_query(ctx, batch, query);
1134    }
1135 }
1136
1137 void
1138 zink_resume_cs_query(struct zink_context *ctx)
1139 {
1140    struct zink_query *query, *next;
1141    LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1142       if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_CS_INVOCATIONS) {
1143          list_delinit(&query->active_list);
1144          query->suspended = false;
1145          begin_query(ctx, &ctx->batch, query);
1146       }
1147    }
1148 }
1149
1150 void
1151 zink_query_update_gs_states(struct zink_context *ctx, bool was_line_loop)
1152 {
1153    struct zink_query *query;
1154    bool suspendall = false;
1155    bool have_gs = !!ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1156    bool have_xfb = !!ctx->num_so_targets;
1157
1158    LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1159       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1160       assert(query->active);
1161       if (query->has_draws) {
1162          if (last_start->have_gs != have_gs ||
1163              last_start->have_xfb != have_xfb) {
1164             suspendall = true;
1165          }
1166       }
1167    }
1168
1169    if (ctx->vertices_query) {
1170       query = ctx->vertices_query;
1171       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1172       assert(query->active);
1173       if (last_start->was_line_loop != was_line_loop) {
1174          suspendall = true;
1175       }
1176    }
1177    if (suspendall) {
1178      zink_suspend_queries(ctx, &ctx->batch);
1179      zink_resume_queries(ctx, &ctx->batch);
1180    }
1181
1182    LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1183       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1184       last_start->have_gs = have_gs;
1185       last_start->have_xfb = have_xfb;
1186       query->has_draws = true;
1187    }
1188    if (ctx->vertices_query) {
1189       query = ctx->vertices_query;
1190       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1191       last_start->was_line_loop = was_line_loop;
1192       query->has_draws = true;
1193    }
1194 }
1195
1196 static void
1197 zink_set_active_query_state(struct pipe_context *pctx, bool enable)
1198 {
1199    struct zink_context *ctx = zink_context(pctx);
1200    ctx->queries_disabled = !enable;
1201
1202    struct zink_batch *batch = &ctx->batch;
1203    if (ctx->queries_disabled)
1204       zink_suspend_queries(ctx, batch);
1205    else
1206       zink_resume_queries(ctx, batch);
1207 }
1208
1209 void
1210 zink_start_conditional_render(struct zink_context *ctx)
1211 {
1212    if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || ctx->render_condition.active)
1213       return;
1214    struct zink_batch *batch = &ctx->batch;
1215    VkConditionalRenderingFlagsEXT begin_flags = 0;
1216    if (ctx->render_condition.inverted)
1217       begin_flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
1218    VkConditionalRenderingBeginInfoEXT begin_info = {0};
1219    begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
1220    begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer;
1221    begin_info.flags = begin_flags;
1222    ctx->render_condition.query->predicate->obj->unordered_read = false;
1223    VKCTX(CmdBeginConditionalRenderingEXT)(batch->state->cmdbuf, &begin_info);
1224    zink_batch_reference_resource_rw(batch, ctx->render_condition.query->predicate, false);
1225    ctx->render_condition.active = true;
1226 }
1227
1228 void
1229 zink_stop_conditional_render(struct zink_context *ctx)
1230 {
1231    struct zink_batch *batch = &ctx->batch;
1232    zink_clear_apply_conditionals(ctx);
1233    if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || !ctx->render_condition.active)
1234       return;
1235    VKCTX(CmdEndConditionalRenderingEXT)(batch->state->cmdbuf);
1236    ctx->render_condition.active = false;
1237 }
1238
1239 static void
1240 zink_render_condition(struct pipe_context *pctx,
1241                       struct pipe_query *pquery,
1242                       bool condition,
1243                       enum pipe_render_cond_flag mode)
1244 {
1245    struct zink_context *ctx = zink_context(pctx);
1246    struct zink_query *query = (struct zink_query *)pquery;
1247    zink_batch_no_rp(ctx);
1248    VkQueryResultFlagBits flags = 0;
1249
1250    if (query == NULL) {
1251       /* force conditional clears if they exist */
1252       if (ctx->clears_enabled && !ctx->batch.in_rp)
1253          zink_batch_rp(ctx);
1254       zink_stop_conditional_render(ctx);
1255       ctx->render_condition_active = false;
1256       ctx->render_condition.query = NULL;
1257       return;
1258    }
1259
1260    if (!query->predicate) {
1261       struct pipe_resource *pres;
1262
1263       /* need to create a vulkan buffer to copy the data into */
1264       pres = pipe_buffer_create(pctx->screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_DEFAULT, sizeof(uint64_t));
1265       if (!pres)
1266          return;
1267
1268       query->predicate = zink_resource(pres);
1269    }
1270    if (query->predicate_dirty) {
1271       struct zink_resource *res = query->predicate;
1272
1273       if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT)
1274          flags |= VK_QUERY_RESULT_WAIT_BIT;
1275
1276       flags |= VK_QUERY_RESULT_64_BIT;
1277       int num_results = get_num_starts(query);
1278       if (num_results) {
1279          if (!is_emulated_primgen(query) &&
1280             !is_so_overflow_query(query)) {
1281             copy_results_to_buffer(ctx, query, res, 0, num_results, flags);
1282          } else {
1283             /* these need special handling */
1284             force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0);
1285          }
1286       } else {
1287          uint64_t zero = 0;
1288          tc_buffer_write(pctx, &res->base.b, 0, sizeof(zero), &zero);
1289       }
1290       zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT);
1291       query->predicate_dirty = false;
1292    }
1293    ctx->render_condition.inverted = condition;
1294    ctx->render_condition_active = true;
1295    ctx->render_condition.query = query;
1296    if (ctx->batch.in_rp)
1297       zink_start_conditional_render(ctx);
1298 }
1299
1300 static void
1301 zink_get_query_result_resource(struct pipe_context *pctx,
1302                                struct pipe_query *pquery,
1303                                enum pipe_query_flags flags,
1304                                enum pipe_query_value_type result_type,
1305                                int index,
1306                                struct pipe_resource *pres,
1307                                unsigned offset)
1308 {
1309    struct zink_context *ctx = zink_context(pctx);
1310    struct zink_screen *screen = zink_screen(pctx->screen);
1311    struct zink_query *query = (struct zink_query*)pquery;
1312    struct zink_resource *res = zink_resource(pres);
1313    unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
1314    VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT;
1315    unsigned num_queries = get_num_starts(query);
1316
1317    /* it's possible that a query may have no data at all: write out zeroes to the buffer and return */
1318    uint64_t u64[4] = {0};
1319    unsigned src_offset = result_size * get_num_results(query);
1320    if (!num_queries) {
1321       tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1322       return;
1323    }
1324
1325    if (index == -1) {
1326       /* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data
1327        * in addition to the availability result, which is a problem if we're just trying to get availability data
1328        *
1329        * if we know that there's no valid buffer data in the preceding buffer range, then we can just
1330        * stomp on it with a glorious queued buffer copy instead of forcing a stall to manually write to the
1331        * buffer
1332        */
1333
1334       VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT;
1335       if (zink_batch_usage_check_completion(ctx, query->batch_uses)) {
1336          struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1337          unsigned query_id = start->vkq[0]->query_id;
1338          VkResult result = VKCTX(GetQueryPoolResults)(screen->dev, start->vkq[0]->pool->query_pool, query_id, 1,
1339                                    sizeof(u64), u64, 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1340          if (result == VK_SUCCESS) {
1341             tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1342             return;
1343          } else {
1344             mesa_loge("ZINK: vkGetQueryPoolResults failed (%s)", vk_Result_to_str(result));
1345          }
1346       }
1347       struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size);
1348       copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1349       zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size);
1350       pipe_resource_reference(&staging, NULL);
1351       return;
1352    }
1353
1354    /*
1355       there is an implicit execution dependency from
1356       each such query command to all query commands previously submitted to the same queue. There
1357       is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
1358       include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
1359       the results of vkCmdEndQuery are available.
1360
1361     * - Chapter 18. Queries
1362     */
1363    size_flags |= VK_QUERY_RESULT_WAIT_BIT;
1364    if (!is_time_query(query) && !is_bool_query(query)) {
1365       if (num_queries == 1 && !is_emulated_primgen(query) &&
1366                               query->type != PIPE_QUERY_PRIMITIVES_EMITTED &&
1367                               !is_bool_query(query)) {
1368          if (size_flags == VK_QUERY_RESULT_64_BIT) {
1369             if (query->needs_update)
1370                update_qbo(ctx, query);
1371             /* internal qbo always writes 64bit value so we can just direct copy */
1372             zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset,
1373                              get_buffer_offset(query),
1374                              result_size);
1375          } else
1376             /* have to do a new copy for 32bit */
1377             copy_results_to_buffer(ctx, query, res, offset, 1, size_flags);
1378          return;
1379       }
1380    }
1381
1382    /* TODO: use CS to aggregate results */
1383
1384    /* unfortunately, there's no way to accumulate results from multiple queries on the gpu without either
1385     * clobbering all but the last result or writing the results sequentially, so we have to manually write the result
1386     */
1387    force_cpu_read(ctx, pquery, result_type, pres, offset);
1388 }
1389
1390 uint64_t
1391 zink_get_timestamp(struct pipe_screen *pscreen)
1392 {
1393    struct zink_screen *screen = zink_screen(pscreen);
1394    uint64_t timestamp, deviation;
1395    if (screen->info.have_EXT_calibrated_timestamps) {
1396       VkCalibratedTimestampInfoEXT cti = {0};
1397       cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
1398       cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
1399       VkResult result = VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, &timestamp, &deviation);
1400       if (result != VK_SUCCESS) {
1401          mesa_loge("ZINK: vkGetCalibratedTimestampsEXT failed (%s)", vk_Result_to_str(result));
1402       }
1403    } else {
1404       zink_screen_lock_context(screen);
1405       struct pipe_context *pctx = &screen->copy_context->base;
1406       struct pipe_query *pquery = pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0);
1407       if (!pquery)
1408          return 0;
1409       union pipe_query_result result = {0};
1410       pctx->begin_query(pctx, pquery);
1411       pctx->end_query(pctx, pquery);
1412       pctx->get_query_result(pctx, pquery, true, &result);
1413       pctx->destroy_query(pctx, pquery);
1414       zink_screen_unlock_context(screen);
1415       timestamp = result.u64;
1416    }
1417    timestamp_to_nanoseconds(screen, &timestamp);
1418    return timestamp;
1419 }
1420
1421 void
1422 zink_context_query_init(struct pipe_context *pctx)
1423 {
1424    struct zink_context *ctx = zink_context(pctx);
1425    list_inithead(&ctx->suspended_queries);
1426    list_inithead(&ctx->primitives_generated_queries);
1427
1428    pctx->create_query = zink_create_query;
1429    pctx->destroy_query = zink_destroy_query;
1430    pctx->begin_query = zink_begin_query;
1431    pctx->end_query = zink_end_query;
1432    pctx->get_query_result = zink_get_query_result;
1433    pctx->get_query_result_resource = zink_get_query_result_resource;
1434    pctx->set_active_query_state = zink_set_active_query_state;
1435    pctx->render_condition = zink_render_condition;
1436 }