freedreno/ir3: Reduce use of compiler->gpu_id

author Rob Clark <robdclark@chromium.org>

Sat, 31 Jul 2021 19:32:28 +0000 (12:32 -0700)

committer Marge Bot <eric+marge@anholt.net>

Fri, 6 Aug 2021 18:51:50 +0000 (18:51 +0000)
author Rob Clark <robdclark@chromium.org>
Sat, 31 Jul 2021 19:32:28 +0000 (12:32 -0700)
committer Marge Bot <eric+marge@anholt.net>
Fri, 6 Aug 2021 18:51:50 +0000 (18:51 +0000)
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c

index 41d1039..1491429 100644 (file)
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -135,7 +135,7 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count)
         * fit. For smaller workgroup sizes, we follow the blob and use the
         * smaller threadsize.
         */
-      if (compiler->gpu_id < 600) {
+      if (compiler->gen < 6) {
           return v->local_size_variable ||
                  threads_per_wg >
                     compiler->threadsize_base * compiler->max_waves;
@@ -317,7 +317,7 @@ ir3_collect_info(struct ir3_shader_variant *v)
      */
     unsigned regs_count =
        info->max_reg + 1 +
-      (compiler->gpu_id >= 600 ? ((info->max_half_reg + 2) / 2) : 0);
+      (compiler->gen >= 6 ? ((info->max_half_reg + 2) / 2) : 0);
  
     info->double_threadsize = ir3_should_double_threadsize(v, regs_count);
     unsigned reg_independent_max_waves =
@@ -781,7 +781,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
         * same block (since we can't propagate address register values
         * across blocks currently)
         */
-      if (compiler->gpu_id < 600)
+      if (compiler->gen < 6)
           return false;
  
        /* NOTE in the special try_swap_mad_two_srcs() case we can be
diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c

index e3a9b35..edcd305 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -84,6 +84,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,
  
     compiler->dev = dev;
     compiler->gpu_id = gpu_id;
+   compiler->gen = gpu_id / 100;
     compiler->robust_ubo_access = robust_ubo_access;
  
     /* All known GPU's have 32k local memory (aka shared) */
@@ -93,7 +94,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,
     compiler->wave_granularity = 2;
     compiler->max_waves = 16;
  
-   if (compiler->gpu_id >= 600) {
+   if (compiler->gen >= 6) {
        compiler->samgq_workaround = true;
        /* a6xx split the pipeline state into geometry and fragment state, in
         * order to let the VS run ahead of the FS. As a result there are now
@@ -136,10 +137,10 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,
        compiler->max_const_safe = 256;
     }
  
-   if (compiler->gpu_id >= 600) {
+   if (compiler->gen >= 6) {
        compiler->reg_size_vec4 =
              fd_dev_info(compiler->gpu_id)->a6xx.reg_size_vec4;
-   } else if (compiler->gpu_id >= 400) {
+   } else if (compiler->gen >= 4) {
        /* On a4xx-a5xx, using r24.x and above requires using the smallest
         * threadsize.
         */
@@ -149,9 +150,9 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,
        compiler->reg_size_vec4 = 96;
     }
  
-   if (compiler->gpu_id >= 600) {
+   if (compiler->gen >= 6) {
        compiler->threadsize_base = 64;
-   } else if (compiler->gpu_id >= 400) {
+   } else if (compiler->gen >= 4) {
        /* TODO: Confirm this for a4xx. For a5xx this is based on the Vulkan
         * 1.1 subgroupSize which is 32.
         */
@@ -160,7 +161,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,
        compiler->threadsize_base = 8;
     }
  
-   if (compiler->gpu_id >= 400) {
+   if (compiler->gen >= 4) {
        /* need special handling for "flat" */
        compiler->flat_bypass = true;
        compiler->levels_add_one = false;
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h

index 427a3b2..d8e9f0c 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@@ -38,6 +38,7 @@ struct ir3_shader;
  struct ir3_compiler {
     struct fd_device *dev;
     uint32_t gpu_id;
+   uint8_t gen;
     uint32_t shader_count;
  
     struct disk_cache *disk_cache;
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c

index a5bc577..a660788 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1312,7 +1312,7 @@ emit_control_barrier(struct ir3_context *ctx)
     struct ir3_block *b = ctx->block;
     struct ir3_instruction *barrier = ir3_BAR(b);
     barrier->cat7.g = true;
-   if (ctx->compiler->gpu_id < 600)
+   if (ctx->compiler->gen < 6)
        barrier->cat7.l = true;
     barrier->flags = IR3_INSTR_SS | IR3_INSTR_SY;
     barrier->barrier_class = IR3_BARRIER_EVERYTHING;
@@ -1361,7 +1361,7 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
              barrier->cat7.g = true;
           }
  
-         if (ctx->compiler->gpu_id > 600) {
+         if (ctx->compiler->gen >= 6) {
              if (modes & nir_var_mem_ssbo) {
                 barrier->cat7.l = true;
              }
@@ -1407,7 +1407,7 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
     case nir_intrinsic_memory_barrier_buffer:
        barrier = ir3_FENCE(b);
        barrier->cat7.g = true;
-      if (ctx->compiler->gpu_id > 600)
+      if (ctx->compiler->gen >= 6)
           barrier->cat7.l = true;
        barrier->cat7.r = true;
        barrier->cat7.w = true;
@@ -1425,7 +1425,7 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        break;
     case nir_intrinsic_memory_barrier_shared:
        barrier = ir3_FENCE(b);
-      if (ctx->compiler->gpu_id < 600)
+      if (ctx->compiler->gen < 6)
           barrier->cat7.l = true;
        barrier->cat7.r = true;
        barrier->cat7.w = true;
@@ -1564,14 +1564,14 @@ emit_intrinsic_barycentric(struct ir3_context *ctx, nir_intrinsic_instr *intr,
           sysval = SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL;
           break;
        case SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID:
-         if (ctx->compiler->gpu_id < 600)
+         if (ctx->compiler->gen < 6)
              sysval = SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL;
           break;
        case SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE:
           sysval = SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL;
           break;
        case SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID:
-         if (ctx->compiler->gpu_id < 600)
+         if (ctx->compiler->gen < 6)
              sysval = SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL;
           break;
        default:
@@ -3289,7 +3289,7 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl)
      * out, we guarantee that all exit paths flow into the stream-
      * out instructions.
      */
-   if ((ctx->compiler->gpu_id < 500) &&
+   if ((ctx->compiler->gen < 5) &&
         (ctx->so->shader->stream_output.num_outputs > 0) &&
         !ctx->so->binning_pass) {
        debug_assert(ctx->so->type == MESA_SHADER_VERTEX);
@@ -4103,7 +4103,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
         * need to make sure not to remove any inputs that are used by
         * the nonbinning VS.
         */
-      if (ctx->compiler->gpu_id >= 600 && so->binning_pass &&
+      if (ctx->compiler->gen >= 6 && so->binning_pass &&
            so->type == MESA_SHADER_VERTEX) {
           for (int i = 0; i < ctx->ninputs; i++) {
              struct ir3_instruction *in = ctx->inputs[i];
@@ -4140,7 +4140,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
        array_insert(ctx->block, ctx->block->keeps, end);
  
        /* at this point, for binning pass, throw away unneeded outputs: */
-      if (so->binning_pass && (ctx->compiler->gpu_id < 600))
+      if (so->binning_pass && (ctx->compiler->gen < 6))
           fixup_binning_pass(ctx, end);
     }
  
@@ -4163,7 +4163,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
      * that the uniform/constant layout for BS and VS matches, so that
      * we can re-use same VS_CONST state group.
      */
-   if (so->binning_pass && (ctx->compiler->gpu_id >= 600)) {
+   if (so->binning_pass && (ctx->compiler->gen >= 6)) {
        fixup_binning_pass(ctx, find_end(ctx->so->ir));
        /* cleanup the result of removing unneeded outputs: */
        while (IR3_PASS(ir, ir3_dce, so)) {
@@ -4195,7 +4195,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
      *
      * Note that VS inputs are expected to be full precision.
      */
-   bool pre_assign_inputs = (ir->compiler->gpu_id >= 600) &&
+   bool pre_assign_inputs = (ir->compiler->gen >= 6) &&
                              (ir->type == MESA_SHADER_VERTEX) &&
                              so->binning_pass;
  
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c

index e534143..c215baa 100644 (file)
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -35,7 +35,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so)
  {
     struct ir3_context *ctx = rzalloc(NULL, struct ir3_context);
  
-   if (compiler->gpu_id >= 400) {
+   if (compiler->gen >= 4) {
        if (so->type == MESA_SHADER_VERTEX) {
           ctx->astc_srgb = so->key.vastc_srgb;
        } else if (so->type == MESA_SHADER_FRAGMENT) {
@@ -50,9 +50,9 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so)
        }
     }
  
-   if (compiler->gpu_id >= 600) {
+   if (compiler->gen >= 6) {
        ctx->funcs = &ir3_a6xx_funcs;
-   } else if (compiler->gpu_id >= 400) {
+   } else if (compiler->gen >= 4) {
        ctx->funcs = &ir3_a4xx_funcs;
     }
  
@@ -109,7 +109,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so)
     /* Enable the texture pre-fetch feature only a4xx onwards.  But
      * only enable it on generations that have been tested:
      */
-   if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gpu_id >= 600))
+   if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gen >= 6))
        NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch);
  
     NIR_PASS(progress, ctx->s, nir_lower_phis_to_scalar, true);
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c

index 1e64e10..b2d8ab2 100644 (file)
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -273,7 +273,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
              regmask_set(&state->needs_sy, n->dsts[0]);
        } else if (is_atomic(n->opc)) {
           if (n->flags & IR3_INSTR_G) {
-            if (ctx->compiler->gpu_id >= 600) {
+            if (ctx->compiler->gen >= 6) {
                 /* New encoding, returns  result via second src: */
                 regmask_set(&state->needs_sy, n->srcs[2]);
              } else {
@@ -801,7 +801,7 @@ nop_sched(struct ir3 *ir, struct ir3_shader_variant *so)
            * a6xx.
            */
  
-         if ((delay > 0) && (ir->compiler->gpu_id >= 600) && last &&
+         if ((delay > 0) && (ir->compiler->gen >= 6) && last &&
               ((opc_cat(last->opc) == 2) || (opc_cat(last->opc) == 3)) &&
               (last->repeat == 0)) {
              /* the previous cat2/cat3 instruction can encode at most 3 nop's: */
@@ -870,7 +870,7 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
        }
     }
  
-   assert(ctx->early_input_release || ctx->compiler->gpu_id > 500);
+   assert(ctx->early_input_release || ctx->compiler->gen >= 5);
  
     /* process each block: */
     do {
diff --git a/src/freedreno/ir3/ir3_lower_parallelcopy.c b/src/freedreno/ir3/ir3_lower_parallelcopy.c

index 81087d6..32b2d30 100644 (file)
--- a/src/freedreno/ir3/ir3_lower_parallelcopy.c
+++ b/src/freedreno/ir3/ir3_lower_parallelcopy.c
@@ -147,7 +147,7 @@ do_swap(struct ir3_compiler *compiler, struct ir3_instruction *instr,
     /* a5xx+ is known to support swz, which enables us to swap two registers
      * in-place. If unsupported we emulate it using the xor trick.
      */
-   if (compiler->gpu_id < 500) {
+   if (compiler->gen < 5) {
        /* Shared regs only exist since a5xx, so we don't have to provide a
         * fallback path for them.
         */
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c

index c0c9a7b..08ba6b8 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -149,7 +149,7 @@ static const nir_shader_compiler_options options_a6xx = {
  const nir_shader_compiler_options *
  ir3_get_compiler_options(struct ir3_compiler *compiler)
  {
-   if (compiler->gpu_id >= 600)
+   if (compiler->gen >= 6)
        return &options_a6xx;
     return &options;
  }
@@ -352,7 +352,7 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
        .lower_tg4_offsets = true,
     };
  
-   if (compiler->gpu_id >= 400) {
+   if (compiler->gen >= 4) {
        /* a4xx seems to have *no* sam.p */
        tex_options.lower_txp = ~0; /* lower all txp */
     } else {
@@ -376,7 +376,7 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
  
     OPT_V(s, nir_lower_tex, &tex_options);
     OPT_V(s, nir_lower_load_const_to_scalar);
-   if (compiler->gpu_id < 500)
+   if (compiler->gen < 5)
        OPT_V(s, ir3_nir_lower_tg4_to_tex);
  
     ir3_optimize_loop(compiler, s);
@@ -491,7 +491,7 @@ ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s)
        NIR_PASS_V(s, nir_lower_fb_read);
     }
  
-   if (compiler->gpu_id >= 600 && s->info.stage == MESA_SHADER_FRAGMENT &&
+   if (compiler->gen >= 6 && s->info.stage == MESA_SHADER_FRAGMENT &&
         !(ir3_shader_debug & IR3_DBG_NOFP16)) {
        NIR_PASS_V(s, nir_lower_mediump_io, nir_var_shader_out, 0, false);
     }
@@ -671,7 +671,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
     /* UBO offset lowering has to come after we've decided what will
      * be left as load_ubo
      */
-   if (so->shader->compiler->gpu_id >= 600)
+   if (so->shader->compiler->gen >= 6)
        progress |= OPT(s, nir_lower_ubo_vec4);
  
     OPT_V(s, ir3_nir_lower_io_offsets, so->shader->compiler->gpu_id);
@@ -820,7 +820,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
  
     ir3_nir_scan_driver_consts(nir, const_state);
  
-   if ((compiler->gpu_id < 500) && (v->shader->stream_output.num_outputs > 0)) {
+   if ((compiler->gen < 5) && (v->shader->stream_output.num_outputs > 0)) {
        const_state->num_driver_params =
           MAX2(const_state->num_driver_params, IR3_DP_VTXCNT_MAX + 1);
     }
@@ -853,13 +853,13 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
  
     if (const_state->num_driver_params > 0) {
        /* offset cannot be 0 for vs params loaded by CP_DRAW_INDIRECT_MULTI */
-      if (v->type == MESA_SHADER_VERTEX && compiler->gpu_id >= 600)
+      if (v->type == MESA_SHADER_VERTEX && compiler->gen >= 6)
           constoff = MAX2(constoff, 1);
        const_state->offsets.driver_param = constoff;
     }
     constoff += const_state->num_driver_params / 4;
  
-   if ((v->type == MESA_SHADER_VERTEX) && (compiler->gpu_id < 500) &&
+   if ((v->type == MESA_SHADER_VERTEX) && (compiler->gen < 5) &&
         v->shader->stream_output.num_outputs > 0) {
        const_state->offsets.tfbo = constoff;
        constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c

index 67ad1c7..2f4f33c 100644 (file)
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -175,14 +175,14 @@ ir3_shader_assemble(struct ir3_shader_variant *v)
      * uploads are in units of 4 dwords. Round it up here to make calculations
      * regarding the shared constlen simpler.
      */
-   if (compiler->gpu_id >= 400)
+   if (compiler->gen >= 4)
        v->constlen = align(v->constlen, 4);
  
     /* Use the per-wave layout by default on a6xx for compute shaders. It
      * should result in better performance when loads/stores are to a uniform
      * index.
      */
-   v->pvtmem_per_wave = compiler->gpu_id >= 600 && !info->multi_dword_ldp_stp &&
+   v->pvtmem_per_wave = compiler->gen >= 6 && !info->multi_dword_ldp_stp &&
                          v->type == MESA_SHADER_COMPUTE;
  
     fixup_regfootprint(v);
@@ -332,7 +332,7 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
     v->nonbinning = nonbinning;
     v->key = *key;
     v->type = shader->type;
-   v->mergedregs = shader->compiler->gpu_id >= 600;
+   v->mergedregs = shader->compiler->gen >= 6;
  
     if (!v->binning_pass)
        v->const_state = rzalloc_size(v, sizeof(*v->const_state));
@@ -494,7 +494,7 @@ ir3_setup_used_key(struct ir3_shader *shader)
         * on older HW.
         */
        key->msaa = info->fs.uses_sample_qualifier ||
-                  (shader->compiler->gpu_id < 600 &&
+                  (shader->compiler->gen < 6 &&
                     (BITSET_TEST(info->system_values_read,
                                  SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID) ||
                      BITSET_TEST(info->system_values_read,
@@ -568,7 +568,7 @@ ir3_trim_constlen(struct ir3_shader_variant **variants,
      * a6xx and the total limit. The frag limit on a6xx only matters for a
      * single stage, so it's always satisfied with the first variant.
      */
-   if (compiler->gpu_id >= 600) {
+   if (compiler->gen >= 6) {
        trimmed |=
           trim_constlens(constlens, MESA_SHADER_VERTEX, MESA_SHADER_GEOMETRY,
                          compiler->max_const_geom, compiler->max_const_safe);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h

index 8f6f2cf..2633530 100644 (file)
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -1034,7 +1034,7 @@ ir3_shader_branchstack_hw(const struct ir3_shader_variant *v)
     if (!v->shader)
        return 0;
  
-   if (v->shader->compiler->gpu_id < 500)
+   if (v->shader->compiler->gen < 5)
        return v->branchstack;
  
     if (v->branchstack > 0) {
diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c

index 3dd0f6a..fb4e851 100644 (file)
--- a/src/freedreno/isa/encode.c
+++ b/src/freedreno/isa/encode.c
@@ -113,10 +113,10 @@ __instruction_case(struct encode_state *s, struct ir3_instruction *instr)
                 }
         } else if (instr->opc == OPC_DEMOTE) {
                 return OPC_KILL;
-       } else if ((instr->block->shader->compiler->gpu_id > 600) &&
+       } else if ((instr->block->shader->compiler->gen >= 6) &&
                         is_atomic(instr->opc) && (instr->flags & IR3_INSTR_G)) {
                 return instr->opc - OPC_ATOMIC_ADD + OPC_ATOMIC_B_ADD;
-       } else if (s->compiler->gpu_id >= 600) {
+       } else if (s->compiler->gen >= 6) {
                 if (instr->opc == OPC_RESINFO) {
                         return OPC_RESINFO_B;
                 } else if (instr->opc == OPC_LDIB) {
author	Rob Clark <robdclark@chromium.org>
	Sat, 31 Jul 2021 19:32:28 +0000 (12:32 -0700)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 6 Aug 2021 18:51:50 +0000 (18:51 +0000)
src/freedreno/ir3/ir3.c		patch \| blob \| history
src/freedreno/ir3/ir3_compiler.c		patch \| blob \| history
src/freedreno/ir3/ir3_compiler.h		patch \| blob \| history
src/freedreno/ir3/ir3_compiler_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_context.c		patch \| blob \| history
src/freedreno/ir3/ir3_legalize.c		patch \| blob \| history
src/freedreno/ir3/ir3_lower_parallelcopy.c		patch \| blob \| history
src/freedreno/ir3/ir3_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_shader.c		patch \| blob \| history
src/freedreno/ir3/ir3_shader.h		patch \| blob \| history
src/freedreno/isa/encode.c		patch \| blob \| history