iris: Add separate dirty bit for VBO flushes.
authorFrancisco Jerez <currojerez@riseup.net>
Fri, 29 May 2020 23:11:58 +0000 (16:11 -0700)
committerMarge Bot <eric+marge@anholt.net>
Thu, 2 Sep 2021 03:14:37 +0000 (03:14 +0000)
Instead of emitting barriers every time IRIS_DIRTY_VERTEX_BUFFERS is
flagged, use a separate dirty bit and optimize out the barriers in
cases where the same buffer object is re-bound as vertex buffer.

Omitting this commit would lead to the following statistically
significant Piglit Draw Overhead regressions:

 36/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ vertex attrib change:      XXX ±7.22% x22 -> XXX±11.09% x21  d=-20.10% ±8.06%  p=0.00%
 98/DrawArrays (16 VBO| 8 UBO|  8 Tex) w/ vertex attrib change:        XXX ±7.27% x22 -> XXX ±7.70% x21  d=-17.76% ±6.83%  p=0.00%
 69/DrawArrays ( 1 VBO| 8 UBO|  8 Tex) w/ vertex attrib change:        XXX ±9.94% x22 -> XXX ±8.72% x21   d=-7.46% ±9.08%  p=1.02%
 53/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ depth enable change:       XXX ±8.34% x22 -> XXX ±6.88% x21   d=-7.30% ±7.45%  p=0.26%
 61/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ cull face enable change:   XXX±10.22% x22 -> XXX ±8.63% x21   d=-6.75% ±9.23%  p=2.11%
 55/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ stencil enable change:     XXX ±9.30% x22 -> XXX ±7.25% x21   d=-6.60% ±8.16%  p=1.14%
 50/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ viewport change:           XXX ±6.48% x22 -> XXX ±5.93% x21   d=-6.58% ±6.04%  p=0.09%
 54/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ depth clamp enable change: XXX ±9.95% x22 -> XXX ±7.95% x21   d=-6.50% ±8.81%  p=2.02%
 35/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ shader program change:     XXX ±7.27% x22 -> XXX ±7.25% x21   d=-5.77% ±7.06%  p=1.06%

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12691>

src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_state.c

index eb5e1ee787ed29f4b068f9a128ae248733034de1..6227320e7d824f69019c664876540f824f8628f7 100644 (file)
@@ -112,6 +112,7 @@ enum {
 #define IRIS_DIRTY_DEPTH_BOUNDS                   (1ull << 29)
 #define IRIS_DIRTY_RENDER_BUFFER                  (1ull << 30)
 #define IRIS_DIRTY_STENCIL_REF                    (1ull << 31)
+#define IRIS_DIRTY_VERTEX_BUFFER_FLUSHES          (1ull << 32)
 
 #define IRIS_ALL_DIRTY_FOR_COMPUTE (IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES)
 
index 98542447c92b51f841c7b9409eaa4d8962edaa2a..be87c60854e2f35298f54627ffe3a03fb3483b7a 100644 (file)
@@ -3441,6 +3441,10 @@ iris_set_vertex_buffers(struct pipe_context *ctx,
       /* We may see user buffers that are NULL bindings. */
       assert(!(buffer->is_user_buffer && buffer->buffer.user != NULL));
 
+      if (buffer->buffer.resource &&
+          state->resource != buffer->buffer.resource)
+         ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFER_FLUSHES;
+
       if (take_ownership) {
          pipe_resource_reference(&state->resource, NULL);
          state->resource = buffer->buffer.resource;
@@ -6400,10 +6404,6 @@ iris_upload_dirty_render_state(struct iris_context *ice,
          uint64_t bound = dynamic_bound;
          while (bound) {
             const int i = u_bit_scan64(&bound);
-
-            iris_emit_buffer_barrier_for(
-               batch, iris_resource_bo(genx->vertex_buffers[i].resource),
-               IRIS_DOMAIN_VF_READ);
             iris_use_optional_res(batch, genx->vertex_buffers[i].resource,
                                   false, IRIS_DOMAIN_VF_READ);
          }
@@ -6427,8 +6427,6 @@ iris_upload_dirty_render_state(struct iris_context *ice,
             struct iris_resource *res =
                (void *) genx->vertex_buffers[i].resource;
             if (res) {
-               iris_emit_buffer_barrier_for(batch, res->bo,
-                                            IRIS_DOMAIN_VF_READ);
                iris_use_pinned_bo(batch, res->bo, false, IRIS_DOMAIN_VF_READ);
 
                high_bits = res->bo->address >> 32ull;
@@ -6607,6 +6605,18 @@ iris_upload_dirty_render_state(struct iris_context *ice,
 #endif
 }
 
+static void
+flush_vbos(struct iris_context *ice, struct iris_batch *batch)
+{
+   struct iris_genx_state *genx = ice->state.genx;
+   uint64_t bound = ice->state.bound_vertex_buffers;
+   while (bound) {
+      const int i = u_bit_scan64(&bound);
+      struct iris_bo *bo = iris_resource_bo(genx->vertex_buffers[i].resource);
+      iris_emit_buffer_barrier_for(batch, bo, IRIS_DOMAIN_VF_READ);
+   }
+}
+
 static void
 iris_upload_render_state(struct iris_context *ice,
                          struct iris_batch *batch,
@@ -6617,6 +6627,9 @@ iris_upload_render_state(struct iris_context *ice,
 {
    bool use_predicate = ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
 
+   if (ice->state.dirty & IRIS_DIRTY_VERTEX_BUFFER_FLUSHES)
+      flush_vbos(ice, batch);
+
    iris_batch_sync_region_start(batch);
 
    /* Always pin the binder.  If we're emitting new binding table pointers,
@@ -7247,7 +7260,8 @@ iris_rebind_buffer(struct iris_context *ice,
 
          if (*addr != bo->address + state->offset) {
             *addr = bo->address + state->offset;
-            ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
+            ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
+                                IRIS_DIRTY_VERTEX_BUFFER_FLUSHES;
          }
       }
    }