intel/fs: report max register pressure in shader stats
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Fri, 3 Feb 2023 16:02:28 +0000 (17:02 +0100)
committerMarge Bot <emma+marge@anholt.net>
Wed, 8 Mar 2023 13:37:07 +0000 (13:37 +0000)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21756>

14 files changed:
src/intel/compiler/brw_compiler.h
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_fs_visitor.cpp
src/intel/compiler/brw_mesh.cpp
src/intel/compiler/brw_shader.cpp
src/intel/compiler/brw_vec4.cpp
src/intel/compiler/brw_vec4_gs_visitor.cpp
src/intel/compiler/brw_vec4_tcs.cpp
src/intel/compiler/test_fs_cmod_propagation.cpp
src/intel/compiler/test_fs_copy_propagation.cpp
src/intel/compiler/test_fs_saturate_propagation.cpp
src/intel/compiler/test_fs_scoreboard.cpp

index 31b2efd..a714aea 100644 (file)
@@ -1702,6 +1702,7 @@ struct brw_compile_stats {
    uint32_t cycles;
    uint32_t spills;
    uint32_t fills;
+   uint32_t max_live_registers;
 };
 
 /** @} */
index 8fc28a5..b2739dc 100644 (file)
@@ -6456,6 +6456,18 @@ fs_visitor::fixup_nomask_control_flow()
    return progress;
 }
 
+uint32_t
+fs_visitor::compute_max_register_pressure()
+{
+   const register_pressure &rp = regpressure_analysis.require();
+   uint32_t ip = 0, max_pressure = 0;
+   foreach_block_and_inst(block, backend_instruction, inst, cfg) {
+      max_pressure = MAX2(max_pressure, rp.regs_live_at_ip[ip]);
+      ip++;
+   }
+   return max_pressure;
+}
+
 void
 fs_visitor::allocate_registers(bool allow_spilling)
 {
@@ -6475,6 +6487,9 @@ fs_visitor::allocate_registers(bool allow_spilling)
       "lifo"
    };
 
+   if (needs_register_pressure)
+      shader_stats.max_register_pressure = compute_max_register_pressure();
+
    bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS);
 
    /* Before we schedule anything, stash off the instruction order as an array
@@ -7443,6 +7458,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
 
    v8 = std::make_unique<fs_visitor>(compiler, params->log_data, mem_ctx, &key->base,
                                      &prog_data->base, nir, 8,
+                                     params->stats != NULL,
                                      debug_enabled);
    if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) {
       params->error_str = ralloc_strdup(mem_ctx, v8->fail_msg);
@@ -7485,6 +7501,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
       /* Try a SIMD16 compile */
       v16 = std::make_unique<fs_visitor>(compiler, params->log_data, mem_ctx, &key->base,
                                          &prog_data->base, nir, 16,
+                                         params->stats != NULL,
                                          debug_enabled);
       v16->import_uniforms(v8.get());
       if (!v16->run_fs(allow_spilling, params->use_rep_send)) {
@@ -7512,6 +7529,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
       /* Try a SIMD32 compile */
       v32 = std::make_unique<fs_visitor>(compiler, params->log_data, mem_ctx, &key->base,
                                          &prog_data->base, nir, 32,
+                                         params->stats != NULL,
                                          debug_enabled);
       v32->import_uniforms(v8.get());
       if (!v32->run_fs(allow_spilling, false)) {
@@ -7789,7 +7807,8 @@ brw_compile_cs(const struct brw_compiler *compiler,
                           key->base.robust_buffer_access);
 
       v[simd] = std::make_unique<fs_visitor>(compiler, params->log_data, mem_ctx, &key->base,
-                                    &prog_data->base, shader, dispatch_width,
+                                             &prog_data->base, shader, dispatch_width,
+                                             params->stats != NULL,
                                              debug_enabled);
 
       const int first = brw_simd_first_compiled(simd_state);
@@ -7922,7 +7941,9 @@ compile_single_bs(const struct brw_compiler *compiler, void *log_data,
 
       v[simd] = std::make_unique<fs_visitor>(compiler, log_data, mem_ctx, &key->base,
                                              &prog_data->base, shader,
-                                             dispatch_width, debug_enabled);
+                                             dispatch_width,
+                                             stats != NULL,
+                                             debug_enabled);
 
       const bool allow_spilling = !brw_simd_any_compiled(simd_state);
       if (v[simd]->run_bs(allow_spilling)) {
index 56b50bd..805c60a 100644 (file)
@@ -83,6 +83,7 @@ struct shader_stats {
    unsigned promoted_constants;
    unsigned spill_count;
    unsigned fill_count;
+   unsigned max_register_pressure;
 };
 
 /** Register numbers for thread payload fields. */
@@ -188,12 +189,14 @@ public:
               struct brw_stage_prog_data *prog_data,
               const nir_shader *shader,
               unsigned dispatch_width,
+              bool needs_register_pressure,
               bool debug_enabled);
    fs_visitor(const struct brw_compiler *compiler, void *log_data,
               void *mem_ctx,
               struct brw_gs_compile *gs_compile,
               struct brw_gs_prog_data *prog_data,
               const nir_shader *shader,
+              bool needs_register_pressure,
               bool debug_enabled);
    void init();
    ~fs_visitor();
@@ -220,6 +223,7 @@ public:
    bool run_mesh(bool allow_spilling);
    void optimize();
    void allocate_registers(bool allow_spilling);
+   uint32_t compute_max_register_pressure();
    bool fixup_sends_duplicate_payload();
    void fixup_3src_null_dest();
    void emit_dummy_memory_fence_before_eot();
@@ -526,6 +530,7 @@ public:
 
    unsigned grf_used;
    bool spilled_any_registers;
+   bool needs_register_pressure;
 
    const unsigned dispatch_width; /**< 8, 16 or 32 */
    unsigned max_dispatch_width;
index 17b123a..7a26bff 100644 (file)
@@ -2537,6 +2537,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
       stats->cycles = perf.latency;
       stats->spills = shader_stats.spill_count;
       stats->fills = shader_stats.fill_count;
+      stats->max_live_registers = shader_stats.max_register_pressure;
    }
 
    return start_offset;
index f43e478..d622966 100644 (file)
@@ -1353,12 +1353,14 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
                        struct brw_stage_prog_data *prog_data,
                        const nir_shader *shader,
                        unsigned dispatch_width,
+                       bool needs_register_pressure,
                        bool debug_enabled)
    : backend_shader(compiler, log_data, mem_ctx, shader, prog_data,
                     debug_enabled),
      key(key), gs_compile(NULL), prog_data(prog_data),
      live_analysis(this), regpressure_analysis(this),
      performance_analysis(this),
+     needs_register_pressure(needs_register_pressure),
      dispatch_width(dispatch_width),
      bld(fs_builder(this, dispatch_width).at_end())
 {
@@ -1370,6 +1372,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
                        struct brw_gs_compile *c,
                        struct brw_gs_prog_data *prog_data,
                        const nir_shader *shader,
+                       bool needs_register_pressure,
                        bool debug_enabled)
    : backend_shader(compiler, log_data, mem_ctx, shader,
                     &prog_data->base.base, debug_enabled),
@@ -1377,6 +1380,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
      prog_data(&prog_data->base.base),
      live_analysis(this), regpressure_analysis(this),
      performance_analysis(this),
+     needs_register_pressure(needs_register_pressure),
      dispatch_width(8),
      bld(fs_builder(this, dispatch_width).at_end())
 {
@@ -1411,10 +1415,7 @@ fs_visitor::init()
    this->last_scratch = 0;
    this->push_constant_loc = NULL;
 
-   this->shader_stats.scheduler_mode = NULL;
-   this->shader_stats.promoted_constants = 0,
-   this->shader_stats.spill_count = 0,
-   this->shader_stats.fill_count = 0,
+   memset(&this->shader_stats, 0, sizeof(this->shader_stats));
 
    this->grf_used = 0;
    this->spilled_any_registers = false;
index f42997d..05bd938 100644 (file)
@@ -334,6 +334,7 @@ brw_compile_task(const struct brw_compiler *compiler,
 
       v[simd] = std::make_unique<fs_visitor>(compiler, params->log_data, mem_ctx, &key->base,
                                              &prog_data->base.base, shader, dispatch_width,
+                                             params->stats != NULL,
                                              debug_enabled);
 
       if (prog_data->base.prog_mask) {
@@ -1042,6 +1043,7 @@ brw_compile_mesh(const struct brw_compiler *compiler,
 
       v[simd] = std::make_unique<fs_visitor>(compiler, params->log_data, mem_ctx, &key->base,
                                              &prog_data->base.base, shader, dispatch_width,
+                                             params->stats != NULL,
                                              debug_enabled);
 
       if (prog_data->base.prog_mask) {
index c6543e8..a870583 100644 (file)
@@ -1394,7 +1394,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
    if (is_scalar) {
       fs_visitor v(compiler, params->log_data, mem_ctx, &key->base,
                    &prog_data->base.base, nir, 8,
-                   debug_enabled);
+                   params->stats != NULL, debug_enabled);
       if (!v.run_tes()) {
          params->error_str = ralloc_strdup(mem_ctx, v.fail_msg);
          return NULL;
index 64e6f29..eda343b 100644 (file)
@@ -2642,7 +2642,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
 
       fs_visitor v(compiler, params->log_data, mem_ctx, &key->base,
                    &prog_data->base.base, nir, 8,
-                   debug_enabled);
+                   params->stats != NULL, debug_enabled);
       if (!v.run_vs()) {
          params->error_str = ralloc_strdup(mem_ctx, v.fail_msg);
          return NULL;
index ac99858..d3fc8bb 100644 (file)
@@ -820,7 +820,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
 
    if (is_scalar) {
       fs_visitor v(compiler, params->log_data, mem_ctx, &c, prog_data, nir,
-                   debug_enabled);
+                   params->stats != NULL, debug_enabled);
       if (v.run_gs()) {
          prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
          prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs;
index f5e5954..d0dcaff 100644 (file)
@@ -449,7 +449,8 @@ brw_compile_tcs(const struct brw_compiler *compiler,
 
    if (is_scalar) {
       fs_visitor v(compiler, params->log_data, mem_ctx, &key->base,
-                   &prog_data->base.base, nir, 8, debug_enabled);
+                   &prog_data->base.base, nir, 8, params->stats != NULL,
+                   debug_enabled);
       if (!v.run_tcs()) {
          params->error_str = ralloc_strdup(mem_ctx, v.fail_msg);
          return NULL;
index b7812c3..cfc99a9 100644 (file)
@@ -60,7 +60,7 @@ public:
                                struct brw_wm_prog_data *prog_data,
                                nir_shader *shader)
       : fs_visitor(compiler, NULL, mem_ctx, NULL,
-                   &prog_data->base, shader, 8, false) {}
+                   &prog_data->base, shader, 8, false, false) {}
 };
 
 
index 0e7ab6c..2de1347 100644 (file)
@@ -49,7 +49,7 @@ public:
                                struct brw_wm_prog_data *prog_data,
                                nir_shader *shader)
       : fs_visitor(compiler, NULL, mem_ctx, NULL,
-                   &prog_data->base, shader, 8, false) {}
+                   &prog_data->base, shader, 8, false, false) {}
 };
 
 
index c6560a0..5c85c01 100644 (file)
@@ -49,7 +49,7 @@ public:
                                    struct brw_wm_prog_data *prog_data,
                                    nir_shader *shader)
       : fs_visitor(compiler, NULL, mem_ctx, NULL,
-                   &prog_data->base, shader, 16, false) {}
+                   &prog_data->base, shader, 16, false, false) {}
 };
 
 
index adfa09b..39b2b85 100644 (file)
@@ -52,7 +52,8 @@ void scoreboard_test::SetUp()
    nir_shader *shader =
       nir_shader_create(ctx, MESA_SHADER_FRAGMENT, NULL, NULL);
 
-   v = new fs_visitor(compiler, NULL, ctx, NULL, &prog_data->base, shader, 8, false);
+   v = new fs_visitor(compiler, NULL, ctx, NULL, &prog_data->base, shader, 8,
+                      false, false);
 
    devinfo->ver = 12;
    devinfo->verx10 = devinfo->ver * 10;