iris: Skip mi_builder init for indirect draws
authorKenneth Graunke <kenneth@whitecape.org>
Fri, 22 Dec 2023 12:48:20 +0000 (04:48 -0800)
committerEric Engestrom <eric@engestrom.ch>
Wed, 27 Dec 2023 13:39:30 +0000 (13:39 +0000)
We only need it for indirect draws.

Improves performance on an i7-12700 and A770:

- Piglit's drawoverhead base case +150.639% +/- 2.86933% (n=15).
- gfxbench5 gl_driver2_off +19.7219% +/- 1.13778% (n=15)
- SPECviewperf2020 catiav5test1 +1.6831% +/- 0.552052% (n=10).

Cc: mesa-stable
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26806>
(cherry picked from commit 55c262898ae7188311c89a60e4ec0fbb67b7a95b)

.pick_status.json
src/gallium/drivers/iris/iris_state.c

index f495786..0c787e9 100644 (file)
@@ -4,7 +4,7 @@
         "description": "iris: Skip mi_builder init for indirect draws",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null,
         "notes": null
index 33301e5..7f7d387 100644 (file)
@@ -7869,6 +7869,11 @@ iris_upload_render_state(struct iris_context *ice,
 #endif
    }
 
+   if (indirect) {
+      struct mi_builder b;
+      uint32_t mocs;
+      mi_builder_init(&b, batch->screen->devinfo, batch);
+
 #define _3DPRIM_END_OFFSET          0x2420
 #define _3DPRIM_START_VERTEX        0x2430
 #define _3DPRIM_VERTEX_COUNT        0x2434
@@ -7876,103 +7881,100 @@ iris_upload_render_state(struct iris_context *ice,
 #define _3DPRIM_START_INSTANCE      0x243C
 #define _3DPRIM_BASE_VERTEX         0x2440
 
-   struct mi_builder b;
-   uint32_t mocs;
-   mi_builder_init(&b, batch->screen->devinfo, batch);
+      if (!indirect->count_from_stream_output) {
+         if (indirect->indirect_draw_count) {
+            use_predicate = true;
+
+            struct iris_bo *draw_count_bo =
+               iris_resource_bo(indirect->indirect_draw_count);
+            unsigned draw_count_offset =
+               indirect->indirect_draw_count_offset;
+            mocs = iris_mocs(draw_count_bo, &batch->screen->isl_dev, 0);
+            mi_builder_set_mocs(&b, mocs);
+
+            if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
+               /* comparison = draw id < draw count */
+               struct mi_value comparison =
+                  mi_ult(&b, mi_imm(drawid_offset),
+                             mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
+
+               /* predicate = comparison & conditional rendering predicate */
+               mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
+                            mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));
+            } else {
+               uint32_t mi_predicate;
 
-   if (indirect && !indirect->count_from_stream_output) {
-      if (indirect->indirect_draw_count) {
-         use_predicate = true;
+               /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
+               mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(drawid_offset));
+               /* Upload the current draw count from the draw parameters buffer
+                * to MI_PREDICATE_SRC0. Zero the top 32-bits of
+                * MI_PREDICATE_SRC0.
+                */
+               mi_store(&b, mi_reg64(MI_PREDICATE_SRC0),
+                        mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
+
+               if (drawid_offset == 0) {
+                  mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
+                                 MI_PREDICATE_COMBINEOP_SET |
+                                 MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+               } else {
+                  /* While draw_index < draw_count the predicate's result will be
+                   *  (draw_index == draw_count) ^ TRUE = TRUE
+                   * When draw_index == draw_count the result is
+                   *  (TRUE) ^ TRUE = FALSE
+                   * After this all results will be:
+                   *  (FALSE) ^ FALSE = FALSE
+                   */
+                  mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
+                                 MI_PREDICATE_COMBINEOP_XOR |
+                                 MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+               }
+               iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
+            }
+         }
+         struct iris_bo *bo = iris_resource_bo(indirect->buffer);
+         assert(bo);
 
-         struct iris_bo *draw_count_bo =
-            iris_resource_bo(indirect->indirect_draw_count);
-         unsigned draw_count_offset =
-            indirect->indirect_draw_count_offset;
-         mocs = iris_mocs(draw_count_bo, &batch->screen->isl_dev, 0);
+         mocs = iris_mocs(bo, &batch->screen->isl_dev, 0);
          mi_builder_set_mocs(&b, mocs);
 
-         if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
-            /* comparison = draw id < draw count */
-            struct mi_value comparison =
-               mi_ult(&b, mi_imm(drawid_offset),
-                          mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
-
-            /* predicate = comparison & conditional rendering predicate */
-            mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
-                         mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));
+         mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
+                  mi_mem32(ro_bo(bo, indirect->offset + 0)));
+         mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT),
+                  mi_mem32(ro_bo(bo, indirect->offset + 4)));
+         mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX),
+                  mi_mem32(ro_bo(bo, indirect->offset + 8)));
+         if (draw->index_size) {
+            mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX),
+                     mi_mem32(ro_bo(bo, indirect->offset + 12)));
+            mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
+                     mi_mem32(ro_bo(bo, indirect->offset + 16)));
          } else {
-            uint32_t mi_predicate;
+            mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
+                     mi_mem32(ro_bo(bo, indirect->offset + 12)));
+            mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0));
+         }
+      } else if (indirect->count_from_stream_output) {
+         struct iris_stream_output_target *so =
+            (void *) indirect->count_from_stream_output;
+         struct iris_bo *so_bo = iris_resource_bo(so->offset.res);
 
-            /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
-            mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(drawid_offset));
-            /* Upload the current draw count from the draw parameters buffer
-             * to MI_PREDICATE_SRC0. Zero the top 32-bits of
-             * MI_PREDICATE_SRC0.
-             */
-            mi_store(&b, mi_reg64(MI_PREDICATE_SRC0),
-                     mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
+         mocs = iris_mocs(so_bo, &batch->screen->isl_dev, 0);
+         mi_builder_set_mocs(&b, mocs);
 
-            if (drawid_offset == 0) {
-               mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
-                              MI_PREDICATE_COMBINEOP_SET |
-                              MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
-            } else {
-               /* While draw_index < draw_count the predicate's result will be
-                *  (draw_index == draw_count) ^ TRUE = TRUE
-                * When draw_index == draw_count the result is
-                *  (TRUE) ^ TRUE = FALSE
-                * After this all results will be:
-                *  (FALSE) ^ FALSE = FALSE
-                */
-               mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
-                              MI_PREDICATE_COMBINEOP_XOR |
-                              MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
-            }
-            iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
-         }
-      }
-      struct iris_bo *bo = iris_resource_bo(indirect->buffer);
-      assert(bo);
-
-      mocs = iris_mocs(bo, &batch->screen->isl_dev, 0);
-      mi_builder_set_mocs(&b, mocs);
-
-      mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
-               mi_mem32(ro_bo(bo, indirect->offset + 0)));
-      mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT),
-               mi_mem32(ro_bo(bo, indirect->offset + 4)));
-      mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX),
-               mi_mem32(ro_bo(bo, indirect->offset + 8)));
-      if (draw->index_size) {
-         mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX),
-                  mi_mem32(ro_bo(bo, indirect->offset + 12)));
-         mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
-                  mi_mem32(ro_bo(bo, indirect->offset + 16)));
-      } else {
-         mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
-                  mi_mem32(ro_bo(bo, indirect->offset + 12)));
+         iris_emit_buffer_barrier_for(batch, so_bo, IRIS_DOMAIN_OTHER_READ);
+
+         struct iris_address addr = ro_bo(so_bo, so->offset.offset);
+         struct mi_value offset =
+            mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset);
+         mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
+                      mi_udiv32_imm(&b, offset, so->stride));
+         mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX), mi_imm(0));
          mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0));
+         mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), mi_imm(0));
+         mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT),
+                  mi_imm(draw->instance_count));
       }
-   } else if (indirect && indirect->count_from_stream_output) {
-      struct iris_stream_output_target *so =
-         (void *) indirect->count_from_stream_output;
-      struct iris_bo *so_bo = iris_resource_bo(so->offset.res);
-
-      mocs = iris_mocs(so_bo, &batch->screen->isl_dev, 0);
-      mi_builder_set_mocs(&b, mocs);
-
-      iris_emit_buffer_barrier_for(batch, so_bo, IRIS_DOMAIN_OTHER_READ);
-
-      struct iris_address addr = ro_bo(so_bo, so->offset.offset);
-      struct mi_value offset =
-         mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset);
-      mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
-                   mi_udiv32_imm(&b, offset, so->stride));
-      mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX), mi_imm(0));
-      mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0));
-      mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), mi_imm(0));
-      mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT),
-               mi_imm(draw->instance_count));
    }
 
    iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_DRAW, draw, indirect, sc);