asahi: Implement occlusion queries

author Alyssa Rosenzweig <alyssa@rosenzweig.io>

Wed, 23 Nov 2022 19:32:35 +0000 (14:32 -0500)

committer Alyssa Rosenzweig <alyssa@rosenzweig.io>

Sun, 11 Dec 2022 02:55:30 +0000 (21:55 -0500)
author Alyssa Rosenzweig <alyssa@rosenzweig.io>
Wed, 23 Nov 2022 19:32:35 +0000 (14:32 -0500)
committer Alyssa Rosenzweig <alyssa@rosenzweig.io>
Sun, 11 Dec 2022 02:55:30 +0000 (21:55 -0500)
diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c

index cd25c08..70df3e6 100644 (file)
--- a/src/gallium/drivers/asahi/agx_batch.c
+++ b/src/gallium/drivers/asahi/agx_batch.c
@@ -51,6 +51,7 @@ agx_batch_init(struct agx_context *ctx,
  
     util_dynarray_init(&batch->scissor, ctx);
     util_dynarray_init(&batch->depth_bias, ctx);
+   util_dynarray_init(&batch->occlusion_queries, ctx);
  
     batch->clear = 0;
     batch->draw = 0;
@@ -85,6 +86,10 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch)
     if (ctx->batch == batch)
        ctx->batch = NULL;
  
+   agx_finish_batch_occlusion_queries(batch);
+   batch->occlusion_buffer.cpu = NULL;
+   batch->occlusion_buffer.gpu = 0;
+
     /* There is no more writer for anything we wrote recorded on this context */
     hash_table_foreach(ctx->writer, ent) {
        if (ent->data == batch)
@@ -102,6 +107,7 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch)
  
     util_dynarray_fini(&batch->scissor);
     util_dynarray_fini(&batch->depth_bias);
+   util_dynarray_fini(&batch->occlusion_queries);
     util_unreference_framebuffer_state(&batch->key);
  
     unsigned batch_idx = agx_batch_idx(batch);
@@ -281,3 +287,25 @@ agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc)
     assert(!_mesa_hash_table_search(ctx->writer, rsrc));
     _mesa_hash_table_insert(ctx->writer, rsrc, batch);
  }
+
+/*
+ * The OpenGL specification says that
+ *
+ *    It must always be true that if any query object returns a result
+ *    available of TRUE, all queries of the same type issued prior to that
+ *    query must also return TRUE.
+ *
+ * To implement this, we need to be able to flush all batches writing occlusion
+ * queries so we ensure ordering.
+ */
+void
+agx_flush_occlusion_queries(struct agx_context *ctx)
+{
+   unsigned i;
+   foreach_batch(ctx, i) {
+      struct agx_batch *other = &ctx->batches.slots[i];
+
+      if (other->occlusion_queries.size != 0)
+         agx_flush_batch_for_reason(ctx, other, "Occlusion query ordering");
+   }
+}
diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c

index c98e08b..68a839b 100644 (file)
--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@@ -1012,6 +1012,18 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
      */
     agx_batch_add_bo(batch, batch->encoder);
  
+   /* Occlusion queries are allocated as a contiguous pool */
+   unsigned oq_count = util_dynarray_num_elements(&batch->occlusion_queries,
+                                                  struct agx_query *);
+   size_t oq_size = oq_count * sizeof(uint64_t);
+
+   if (oq_size) {
+      batch->occlusion_buffer = agx_pool_alloc_aligned(&batch->pool, oq_size, 64);
+      memset(batch->occlusion_buffer.cpu, 0, oq_size);
+   } else {
+      batch->occlusion_buffer.gpu = 0;
+   }
+
     unsigned handle_count =
        agx_batch_num_bo(batch) +
        agx_pool_num_bos(&batch->pool) +
@@ -1044,6 +1056,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
                 encoder_id,
                 scissor,
                 zbias,
+               batch->occlusion_buffer.gpu,
                 pipeline_background,
                 pipeline_background_partial,
                 pipeline_store,
diff --git a/src/gallium/drivers/asahi/agx_query.c b/src/gallium/drivers/asahi/agx_query.c

index b913782..8312201 100644 (file)
--- a/src/gallium/drivers/asahi/agx_query.c
+++ b/src/gallium/drivers/asahi/agx_query.c
@@ -10,6 +10,9 @@ agx_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
  {
     struct agx_query *query = calloc(1, sizeof(struct agx_query));
  
+   query->type = query_type;
+   query->index = index;
+
     return (struct pipe_query *)query;
  }
  
@@ -20,32 +23,157 @@ agx_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
  }
  
  static bool
-agx_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+agx_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
  {
-   return true;
+   struct agx_context *ctx = agx_context(pctx);
+   struct agx_query *query = (struct agx_query *) pquery;
+
+   switch (query->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      ctx->occlusion_query = query;
+      ctx->dirty |= AGX_DIRTY_QUERY;
+
+      /* begin_query zeroes, flush so we can do that write. If anything (i.e.
+       * other than piglit) actually hits this, we could shadow the query to
+       * avoid the flush.
+       */
+      if (query->writer)
+         agx_flush_batch_for_reason(ctx, query->writer, "Occlusion overwritten");
+
+      assert(query->writer == NULL);
+
+      query->value = 0;
+      return true;
+
+   default:
+      return false;
+   }
  }
  
  static bool
-agx_end_query(struct pipe_context *ctx, struct pipe_query *query)
+agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
  {
-   return true;
+   struct agx_context *ctx = agx_context(pctx);
+   struct agx_query *query = (struct agx_query *) pquery;
+
+   switch (query->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      ctx->occlusion_query = NULL;
+      ctx->dirty |= AGX_DIRTY_QUERY;
+      return true;
+
+   default:
+      return false;
+   }
  }
  
  static bool
-agx_get_query_result(struct pipe_context *ctx,
-                     struct pipe_query *query,
+agx_get_query_result(struct pipe_context *pctx,
+                     struct pipe_query *pquery,
                       bool wait,
                       union pipe_query_result *vresult)
  {
-   uint64_t *result = (uint64_t*)vresult;
+   struct agx_query *query = (struct agx_query *) pquery;
+   struct agx_context *ctx = agx_context(pctx);
  
-   *result = 0;
-   return true;
+   switch (query->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      if (query->writer != NULL) {
+         assert(query->writer->occlusion_queries.size != 0);
+
+         /* Querying the result forces a query to finish in finite time, so we
+          * need to flush regardless. Furthermore, we need all earlier queries
+          * to finish before this query, so we flush all batches writing queries
+          * now. Yes, this sucks for tilers.
+          */
+         agx_flush_occlusion_queries(ctx);
+
+         /* TODO: Respect wait when we have real sync */
+      }
+
+      assert(query->writer == NULL && "cleared when cleaning up batch");
+
+      if (query->type == PIPE_QUERY_OCCLUSION_COUNTER)
+         vresult->u64 = query->value;
+      else
+         vresult->b = query->value;
+
+      return true;
+
+   default:
+      unreachable("Other queries not yet supported");
+   }
  }
  
  static void
  agx_set_active_query_state(struct pipe_context *pipe, bool enable)
  {
+   struct agx_context *ctx = agx_context(pipe);
+
+   ctx->active_queries = enable;
+   ctx->dirty |= AGX_DIRTY_QUERY;
+}
+
+uint16_t
+agx_get_oq_index(struct agx_batch *batch, struct agx_query *query)
+{
+   /* If written by another batch, flush it now. If this affects real apps, we
+    * could avoid this flush by merging query results.
+    */
+   if (query->writer && query->writer != batch) {
+      agx_flush_batch_for_reason(batch->ctx, query->writer,
+                                 "Multiple occlusion query writers");
+   }
+
+   /* Allocate if needed */
+   if (query->writer == NULL) {
+      query->writer = batch;
+      query->writer_index = util_dynarray_num_elements(&batch->occlusion_queries,
+                                                       struct agx_query *);
+
+      util_dynarray_append(&batch->occlusion_queries, struct agx_query *, query);
+   }
+
+   assert(query->writer == batch);
+   assert(*util_dynarray_element(&batch->occlusion_queries, struct agx_query *,
+                                 query->writer_index) == query);
+
+   return query->writer_index;
+}
+
+void
+agx_finish_batch_occlusion_queries(struct agx_batch *batch)
+{
+   uint64_t *results = (uint64_t *) batch->occlusion_buffer.cpu;
+
+   util_dynarray_foreach(&batch->occlusion_queries, struct agx_query *, it) {
+      struct agx_query *query = *it;
+      assert(query->writer == batch);
+
+      /* Get the result for this batch. If results is NULL, it means that no
+       * draws actually enabled any occlusion queries, so there's no change.
+       */
+      if (results != NULL) {
+         uint64_t result = *(results++);
+
+         /* Accumulate with the previous result (e.g. in case we split a frame
+          * into multiple batches so an API-level query spans multiple batches).
+          */
+         if (query->type == PIPE_QUERY_OCCLUSION_COUNTER)
+            query->value += result;
+         else
+            query->value |= (!!result);
+      }
+
+      query->writer = NULL;
+      query->writer_index = 0;
+   }
  }
  
  void
@@ -57,4 +185,7 @@ agx_init_query_functions(struct pipe_context *pctx)
     pctx->end_query = agx_end_query;
     pctx->get_query_result = agx_get_query_result;
     pctx->set_active_query_state = agx_set_active_query_state;
+
+   /* By default queries are active */
+   agx_context(pctx)->active_queries = true;
  }
diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c

index adcfaaa..40e3554 100644 (file)
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -1698,7 +1698,6 @@ agx_batch_init_state(struct agx_batch *batch)
        .w_clamp = true,
        .varying_word_1 = true,
        .cull_2 = true,
-      .occlusion_query = true,
        .occlusion_query_2 = true,
        .output_unknown = true,
        .varying_word_2 = true,
@@ -1707,7 +1706,6 @@ agx_batch_init_state(struct agx_batch *batch)
     agx_ppp_push(&ppp, W_CLAMP, cfg) cfg.w_clamp = 1e-10;
     agx_ppp_push(&ppp, VARYING_1, cfg);
     agx_ppp_push(&ppp, CULL_2, cfg);
-   agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY, cfg);
     agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY_2, cfg);
     agx_ppp_push(&ppp, OUTPUT_UNKNOWN, cfg);
     agx_ppp_push(&ppp, VARYING_2, cfg);
@@ -1833,7 +1831,7 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out,
                              (is_points && IS_DIRTY(SPRITE_COORD_MODE));
  
     bool fragment_control_dirty = IS_DIRTY(ZS) || IS_DIRTY(RS) ||
-                                 IS_DIRTY(PRIM);
+                                 IS_DIRTY(PRIM) || IS_DIRTY(QUERY);
  
     bool fragment_face_dirty = IS_DIRTY(ZS) || IS_DIRTY(STENCIL_REF) ||
                                IS_DIRTY(RS);
@@ -1857,11 +1855,19 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out,
        .varying_word_0 = IS_DIRTY(VS_PROG),
        .cull = IS_DIRTY(RS),
        .fragment_shader = IS_DIRTY(FS) || varyings_dirty,
+      .occlusion_query = IS_DIRTY(QUERY),
        .output_size = IS_DIRTY(VS_PROG),
     });
  
     if (fragment_control_dirty) {
        agx_ppp_push(&ppp, FRAGMENT_CONTROL, cfg) {
+         if (ctx->active_queries && ctx->occlusion_query) {
+            if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER)
+               cfg.visibility_mode = AGX_VISIBILITY_MODE_COUNTING;
+            else
+               cfg.visibility_mode = AGX_VISIBILITY_MODE_BOOLEAN;
+         }
+
           cfg.stencil_test_enable = ctx->zs->base.stencil[0].enabled;
           cfg.two_sided_stencil = ctx->zs->base.stencil[1].enabled;
           cfg.depth_bias_enable = rast->base.offset_tri;
@@ -1954,6 +1960,16 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out,
        }
     }
  
+   if (IS_DIRTY(QUERY)) {
+      agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY, cfg) {
+         if (ctx->active_queries && ctx->occlusion_query) {
+            cfg.index = agx_get_oq_index(batch, ctx->occlusion_query);
+         } else {
+            cfg.index = 0;
+         }
+      }
+   }
+
     if (IS_DIRTY(VS_PROG)) {
        agx_ppp_push(&ppp, OUTPUT_SIZE, cfg)
           cfg.count = vs->info.varyings.vs.nr_index;
diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h

index 3495f9c..7866895 100644 (file)
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@@ -127,6 +127,12 @@ struct agx_batch {
  
     /* Scissor and depth-bias descriptors, uploaded at GPU time */
     struct util_dynarray scissor, depth_bias;
+
+   /* Indexed occlusion queries within the occlusion buffer, and the occlusion
+    * buffer itself which is allocated at submit time.
+    */
+   struct util_dynarray occlusion_queries;
+   struct agx_ptr occlusion_buffer;
  };
  
  struct agx_zsa {
@@ -186,6 +192,7 @@ enum agx_dirty {
     AGX_DIRTY_FS_PROG    = BITFIELD_BIT(11),
  
     AGX_DIRTY_BLEND      = BITFIELD_BIT(12),
+   AGX_DIRTY_QUERY      = BITFIELD_BIT(13),
  };
  
  #define AGX_MAX_BATCHES (2)
@@ -228,6 +235,9 @@ struct agx_context {
     bool cond_cond;
     enum pipe_render_cond_flag cond_mode;
  
+   struct agx_query *occlusion_query;
+   bool active_queries;
+
     struct util_debug_callback debug;
     bool is_noop;
  
@@ -264,7 +274,19 @@ struct agx_rasterizer {
  };
  
  struct agx_query {
-   unsigned    query;
+   unsigned type;
+   unsigned index;
+
+   /* Invariant for occlusion queries:
+    *
+    *    writer != NULL => writer->occlusion_queries[writer_index] == this, and
+    *    writer == NULL => no batch such that this in batch->occlusion_queries
+    */
+   struct agx_batch *writer;
+   unsigned writer_index;
+
+   /* For occlusion queries, which use some CPU work */
+   uint64_t value;
  };
  
  struct agx_sampler_state {
@@ -438,6 +460,8 @@ void agx_flush_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch
  void agx_flush_all(struct agx_context *ctx, const char *reason);
  void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
  void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
+void agx_flush_batches_writing_occlusion_queries(struct agx_context *ctx);
+void agx_flush_occlusion_queries(struct agx_context *ctx);
  
  /* Use these instead of batch_add_bo for proper resource tracking */
  void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc);
@@ -464,4 +488,11 @@ agx_batch_init_state(struct agx_batch *batch);
  uint64_t
  agx_build_meta(struct agx_batch *batch, bool store, bool partial_render);
  
+/* Query management */
+uint16_t
+agx_get_oq_index(struct agx_batch *batch, struct agx_query *query);
+
+void
+agx_finish_batch_occlusion_queries(struct agx_batch *batch);
+
  #endif
diff --git a/src/gallium/drivers/asahi/magic.c b/src/gallium/drivers/asahi/magic.c

index 1f8d9ec..9064d52 100644 (file)
--- a/src/gallium/drivers/asahi/magic.c
+++ b/src/gallium/drivers/asahi/magic.c
@@ -156,6 +156,7 @@ demo_cmdbuf(uint64_t *buf, size_t size,
              uint64_t encoder_id,
              uint64_t scissor_ptr,
              uint64_t depth_bias_ptr,
+            uint64_t occlusion_ptr,
              uint32_t pipeline_clear,
              uint32_t pipeline_load,
              uint32_t pipeline_store,
@@ -194,6 +195,7 @@ demo_cmdbuf(uint64_t *buf, size_t size,
        cfg.store_pipeline = pipeline_store;
        cfg.scissor_array = scissor_ptr;
        cfg.depth_bias_array = depth_bias_ptr;
+      cfg.visibility_result_buffer = occlusion_ptr;
  
        if (framebuffer->zsbuf) {
           struct pipe_surface *zsbuf = framebuffer->zsbuf;
diff --git a/src/gallium/drivers/asahi/magic.h b/src/gallium/drivers/asahi/magic.h

index 0231afd..a1e98f2 100644 (file)
--- a/src/gallium/drivers/asahi/magic.h
+++ b/src/gallium/drivers/asahi/magic.h
@@ -32,6 +32,7 @@ demo_cmdbuf(uint64_t *buf, size_t size,
              uint64_t encoder_id,
              uint64_t scissor_ptr,
              uint64_t depth_bias_ptr,
+            uint64_t occlusion_ptr,
              uint32_t pipeline_clear,
              uint32_t pipeline_load,
              uint32_t pipeline_store,
author	Alyssa Rosenzweig <alyssa@rosenzweig.io>
	Wed, 23 Nov 2022 19:32:35 +0000 (14:32 -0500)
committer	Alyssa Rosenzweig <alyssa@rosenzweig.io>
	Sun, 11 Dec 2022 02:55:30 +0000 (21:55 -0500)
src/gallium/drivers/asahi/agx_batch.c		patch \| blob \| history
src/gallium/drivers/asahi/agx_pipe.c		patch \| blob \| history
src/gallium/drivers/asahi/agx_query.c		patch \| blob \| history
src/gallium/drivers/asahi/agx_state.c		patch \| blob \| history
src/gallium/drivers/asahi/agx_state.h		patch \| blob \| history
src/gallium/drivers/asahi/magic.c		patch \| blob \| history
src/gallium/drivers/asahi/magic.h		patch \| blob \| history