panfrost: Fix uniform_count on Midgard

author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

Wed, 17 Feb 2021 20:36:13 +0000 (15:36 -0500)

committer Marge Bot <eric+marge@anholt.net>

Thu, 18 Feb 2021 16:06:36 +0000 (16:06 +0000)
author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Wed, 17 Feb 2021 20:36:13 +0000 (15:36 -0500)
committer Marge Bot <eric+marge@anholt.net>
Thu, 18 Feb 2021 16:06:36 +0000 (16:06 +0000)
diff --git a/src/panfrost/lib/pan_shader.h b/src/panfrost/lib/pan_shader.h

index 43f6733..7d36d47 100644 (file)
--- a/src/panfrost/lib/pan_shader.h
+++ b/src/panfrost/lib/pan_shader.h
@@ -47,8 +47,10 @@ static inline void
  pan_shader_prepare_midgard_rsd(const struct pan_shader_info *info,
                                 struct MALI_RENDERER_STATE *rsd)
  {
+        assert((info->push.count & 3) == 0);
+
          rsd->properties.uniform_buffer_count = info->ubo_count;
-        rsd->properties.midgard.uniform_count = info->midgard.uniform_cutoff;
+        rsd->properties.midgard.uniform_count = info->push.count / 4;
          rsd->properties.midgard.shader_has_side_effects = info->writes_global;
          rsd->properties.midgard.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;
  
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h

index f804ee0..23ca0e8 100644 (file)
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -289,9 +289,6 @@ typedef struct compiler_context {
          /* Set of NIR indices that were already emitted as outmods */
          BITSET_WORD *already_emitted;
  
-        /* The number of uniforms allowable for the fast path */
-        int uniform_cutoff;
-
          /* Count of instructions emitted from NIR overall, across all blocks */
          int instruction_count;
  
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c

index 7f6c18a..38efe98 100644 (file)
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -3008,11 +3008,6 @@ midgard_compile_shader_nir(nir_shader *nir,
          ctx->blend_src1 = ~0;
          ctx->quirks = midgard_get_quirks(inputs->gpu_id);
  
-        /* Start off with a safe cutoff, allowing usage of all 16 work
-         * registers. Later, we'll promote uniform reads to uniform registers
-         * if we determine it is beneficial to do so */
-        info->midgard.uniform_cutoff = 8;
-
          /* Initialize at a global (not block) level hash tables */
  
          ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c

index 44b3c7d..d3757b2 100644 (file)
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -391,11 +391,11 @@ mir_is_64(midgard_instruction *ins)
  static struct lcra_state *
  allocate_registers(compiler_context *ctx, bool *spilled)
  {
-        /* The number of vec4 work registers available depends on when the
-         * uniforms start and the shader stage. By ABI we limit blend shaders
-         * to 8 registers, should be lower XXX */
-        int work_count = ctx->inputs->is_blend ? 8 :
-                16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
+        /* The number of vec4 work registers available depends on the number of
+         * register-mapped uniforms and the shader stage. By ABI we limit blend
+         * shaders to 8 registers, should be lower XXX */
+        unsigned rmu = ctx->info->push.count / 4;
+        int work_count = ctx->inputs->is_blend ? 8 : 16 - MAX2(rmu - 8, 0);
  
         /* No register allocation to do with no SSA */
  
@@ -959,15 +959,13 @@ mir_spill_register(
  static void
  mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
  {
-        unsigned old_work_count =
-                16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
+        unsigned uniforms = ctx->info->push.count / 4;
+        unsigned old_work_count = 16 - MAX2(uniforms - 8, 0);
          unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
  
          unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
          unsigned max_demote = SSA_FIXED_REGISTER(work_count);
  
-        ctx->info->midgard.uniform_cutoff = new_cutoff;
-
          mir_foreach_block(ctx, _block) {
                  midgard_block *block = (midgard_block *) _block;
                  mir_foreach_instr_in_block(block, ins) {
@@ -1002,6 +1000,8 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
                          }
                  }
          }
+
+        ctx->info->push.count = MIN2(ctx->info->push.count, new_cutoff * 4);
  }
  
  /* Run register allocation in a loop, spilling until we succeed */
@@ -1022,13 +1022,12 @@ mir_ra(compiler_context *ctx)
          do {
                  if (spilled) {
                          signed spill_node = mir_choose_spill_node(ctx, l);
+                        unsigned uniforms = ctx->info->push.count / 4;
  
                          /* It's a lot cheaper to demote uniforms to get more
                           * work registers than to spill to TLS. */
-                        if (l->spill_class == REG_CLASS_WORK &&
-                            ctx->info->midgard.uniform_cutoff > 8) {
-
-                                mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8));
+                        if (l->spill_class == REG_CLASS_WORK && uniforms > 8) {
+                                mir_demote_uniforms(ctx, MAX2(uniforms - 4, 8));
                          } else if (spill_node == -1) {
                                  fprintf(stderr, "ERROR: Failed to choose spill node\n");
                                  lcra_free(l);
diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c

index 744d88e..19525ef 100644 (file)
--- a/src/panfrost/midgard/mir_promote_uniforms.c
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@@ -263,7 +263,9 @@ midgard_promote_uniforms(compiler_context *ctx)
          unsigned work_count = mir_work_heuristic(ctx, &analysis);
          unsigned promoted_count = 24 - work_count;
  
+        /* Ensure we are 16 byte aligned to avoid underallocations */
          mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
+        ctx->info->push.count = ALIGN_POT(ctx->info->push.count, 4);
  
          /* First, figure out special indices a priori so we don't recompute a lot */
          BITSET_WORD *special = mir_special_indices(ctx);
@@ -287,9 +289,6 @@ midgard_promote_uniforms(compiler_context *ctx)
  
                  /* Should've taken into account when pushing */
                  assert(address < promoted_count);
-
-                ctx->info->midgard.uniform_cutoff =
-                        MAX2(ctx->info->midgard.uniform_cutoff, address + 1);
                  unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
  
                  /* We do need the move for safety for a non-SSA dest, or if
diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h

index 1c09479..b8925a3 100644 (file)
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@@ -145,7 +145,6 @@ struct bifrost_shader_info {
  };
  
  struct midgard_shader_info {
-        unsigned uniform_cutoff;
          unsigned first_tag;
  };
author	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
	Wed, 17 Feb 2021 20:36:13 +0000 (15:36 -0500)
committer	Marge Bot <eric+marge@anholt.net>
	Thu, 18 Feb 2021 16:06:36 +0000 (16:06 +0000)
src/panfrost/lib/pan_shader.h		patch \| blob \| history
src/panfrost/midgard/compiler.h		patch \| blob \| history
src/panfrost/midgard/midgard_compile.c		patch \| blob \| history
src/panfrost/midgard/midgard_ra.c		patch \| blob \| history
src/panfrost/midgard/mir_promote_uniforms.c		patch \| blob \| history
src/panfrost/util/pan_ir.h		patch \| blob \| history