iris: Track dirty UBOs per-stage for more targeted flushing.
authorFrancisco Jerez <currojerez@riseup.net>
Tue, 5 May 2020 20:05:52 +0000 (13:05 -0700)
committerMarge Bot <eric+marge@anholt.net>
Thu, 2 Sep 2021 03:14:37 +0000 (03:14 +0000)
This allows us to skip over individual constant buffer bindings which
haven't been changed since the last flush, or which are set to a user
buffer, which means they don't require flushing.

Omitting this commit would lead to the following statistically
significant Piglit Draw Overhead regressions:

 107/DrawArrays (16 VBO| 8 UBO|  8 Tex) w/ 1 UBO change:   XXX ±2.31% x22 -> XXX ±2.55% x21  d=-3.49% ±2.38%  p=0.00%
 79/DrawArrays ( 1 VBO| 8 UBO|  8 Tex) w/ 8 UBOs change:   XXX ±1.90% x22 -> XXX ±2.25% x21  d=-3.20% ±2.04%  p=0.00%
 78/DrawArrays ( 1 VBO| 8 UBO|  8 Tex) w/ 1 UBO change:    XXX ±2.64% x22 -> XXX ±2.58% x21  d=-2.74% ±2.58%  p=0.12%
 45/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ 1 UBO change:  XXX ±2.53% x22 -> XXX ±2.29% x21  d=-2.41% ±2.39%  p=0.20%
 108/DrawArrays (16 VBO| 8 UBO|  8 Tex) w/ 8 UBOs change:  XXX ±2.10% x22 -> XXX ±1.41% x21  d=-2.36% ±1.78%  p=0.01%
 16/DrawElements ( 1 VBO| 8 UBO|  8 Tex) w/ 1 UBO change:  XXX ±2.44% x22 -> XXX ±1.19% x21  d=-2.12% ±1.93%  p=0.09%
 46/DrawElements (16 VBO| 8 UBO|  8 Tex) w/ 8 UBOs change: XXX ±2.93% x22 -> XXX ±2.44% x21  d=-1.99% ±2.68%  p=1.93%

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12691>

src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_resolve.c
src/gallium/drivers/iris/iris_state.c

index 87d8bcc..4418ad0 100644 (file)
@@ -541,6 +541,7 @@ struct iris_shader_state {
 
    /** Bitfield of which constant buffers are bound (non-null). */
    uint32_t bound_cbufs;
+   uint32_t dirty_cbufs;
 
    /** Bitfield of which image views are bound (non-null). */
    uint32_t bound_image_views;
index 09acaf5..361e5aa 100644 (file)
@@ -375,7 +375,7 @@ static void
 flush_ubos(struct iris_batch *batch,
             struct iris_shader_state *shs)
 {
-   uint32_t cbufs = shs->bound_cbufs;
+   uint32_t cbufs = shs->dirty_cbufs & shs->bound_cbufs;
 
    while (cbufs) {
       const int i = u_bit_scan(&cbufs);
@@ -383,6 +383,8 @@ flush_ubos(struct iris_batch *batch,
       struct iris_resource *res = (void *)cbuf->buffer;
       iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_OTHER_READ);
    }
+
+   shs->dirty_cbufs = 0;
 }
 
 static void
index 9a4d3fa..a9b2499 100644 (file)
@@ -3234,6 +3234,7 @@ iris_set_constant_buffer(struct pipe_context *ctx,
          if (cbuf->buffer != input->buffer) {
             ice->state.dirty |= (IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES |
                                  IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES);
+            shs->dirty_cbufs |= 1u << index;
          }
 
          if (take_ownership) {
@@ -7315,6 +7316,7 @@ iris_rebind_buffer(struct iris_context *ice,
 
             if (res->bo == iris_resource_bo(cbuf->buffer)) {
                pipe_resource_reference(&surf_state->res, NULL);
+               shs->dirty_cbufs |= 1u << i;
                ice->state.dirty |= (IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES |
                                     IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES);
                ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_VS << s;