From: Rob Clark Date: Sat, 31 Jul 2021 19:32:28 +0000 (-0700) Subject: freedreno/ir3: Reduce use of compiler->gpu_id X-Git-Tag: upstream/22.3.5~19489 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cc72eeb07761cfeb03674e97d2ebf1adfc7f0397;p=platform%2Fupstream%2Fmesa.git freedreno/ir3: Reduce use of compiler->gpu_id For the same reason as previous patch. Mostly we only care about the generation, so convert things to use compiler->gen instead. Signed-off-by: Rob Clark Part-of: --- diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 41d1039..1491429 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -135,7 +135,7 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count) * fit. For smaller workgroup sizes, we follow the blob and use the * smaller threadsize. */ - if (compiler->gpu_id < 600) { + if (compiler->gen < 6) { return v->local_size_variable || threads_per_wg > compiler->threadsize_base * compiler->max_waves; @@ -317,7 +317,7 @@ ir3_collect_info(struct ir3_shader_variant *v) */ unsigned regs_count = info->max_reg + 1 + - (compiler->gpu_id >= 600 ? ((info->max_half_reg + 2) / 2) : 0); + (compiler->gen >= 6 ? ((info->max_half_reg + 2) / 2) : 0); info->double_threadsize = ir3_should_double_threadsize(v, regs_count); unsigned reg_independent_max_waves = @@ -781,7 +781,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) * same block (since we can't propagate address register values * across blocks currently) */ - if (compiler->gpu_id < 600) + if (compiler->gen < 6) return false; /* NOTE in the special try_swap_mad_two_srcs() case we can be diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index e3a9b35..edcd305 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -84,6 +84,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id, compiler->dev = dev; compiler->gpu_id = gpu_id; + compiler->gen = gpu_id / 100; compiler->robust_ubo_access = robust_ubo_access; /* All known GPU's have 32k local memory (aka shared) */ @@ -93,7 +94,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id, compiler->wave_granularity = 2; compiler->max_waves = 16; - if (compiler->gpu_id >= 600) { + if (compiler->gen >= 6) { compiler->samgq_workaround = true; /* a6xx split the pipeline state into geometry and fragment state, in * order to let the VS run ahead of the FS. As a result there are now @@ -136,10 +137,10 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id, compiler->max_const_safe = 256; } - if (compiler->gpu_id >= 600) { + if (compiler->gen >= 6) { compiler->reg_size_vec4 = fd_dev_info(compiler->gpu_id)->a6xx.reg_size_vec4; - } else if (compiler->gpu_id >= 400) { + } else if (compiler->gen >= 4) { /* On a4xx-a5xx, using r24.x and above requires using the smallest * threadsize. */ @@ -149,9 +150,9 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id, compiler->reg_size_vec4 = 96; } - if (compiler->gpu_id >= 600) { + if (compiler->gen >= 6) { compiler->threadsize_base = 64; - } else if (compiler->gpu_id >= 400) { + } else if (compiler->gen >= 4) { /* TODO: Confirm this for a4xx. For a5xx this is based on the Vulkan * 1.1 subgroupSize which is 32. */ @@ -160,7 +161,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id, compiler->threadsize_base = 8; } - if (compiler->gpu_id >= 400) { + if (compiler->gen >= 4) { /* need special handling for "flat" */ compiler->flat_bypass = true; compiler->levels_add_one = false; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 427a3b2..d8e9f0c 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -38,6 +38,7 @@ struct ir3_shader; struct ir3_compiler { struct fd_device *dev; uint32_t gpu_id; + uint8_t gen; uint32_t shader_count; struct disk_cache *disk_cache; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index a5bc577..a660788 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1312,7 +1312,7 @@ emit_control_barrier(struct ir3_context *ctx) struct ir3_block *b = ctx->block; struct ir3_instruction *barrier = ir3_BAR(b); barrier->cat7.g = true; - if (ctx->compiler->gpu_id < 600) + if (ctx->compiler->gen < 6) barrier->cat7.l = true; barrier->flags = IR3_INSTR_SS | IR3_INSTR_SY; barrier->barrier_class = IR3_BARRIER_EVERYTHING; @@ -1361,7 +1361,7 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr) barrier->cat7.g = true; } - if (ctx->compiler->gpu_id > 600) { + if (ctx->compiler->gen >= 6) { if (modes & nir_var_mem_ssbo) { barrier->cat7.l = true; } @@ -1407,7 +1407,7 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_memory_barrier_buffer: barrier = ir3_FENCE(b); barrier->cat7.g = true; - if (ctx->compiler->gpu_id > 600) + if (ctx->compiler->gen >= 6) barrier->cat7.l = true; barrier->cat7.r = true; barrier->cat7.w = true; @@ -1425,7 +1425,7 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr) break; case nir_intrinsic_memory_barrier_shared: barrier = ir3_FENCE(b); - if (ctx->compiler->gpu_id < 600) + if (ctx->compiler->gen < 6) barrier->cat7.l = true; barrier->cat7.r = true; barrier->cat7.w = true; @@ -1564,14 +1564,14 @@ emit_intrinsic_barycentric(struct ir3_context *ctx, nir_intrinsic_instr *intr, sysval = SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL; break; case SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID: - if (ctx->compiler->gpu_id < 600) + if (ctx->compiler->gen < 6) sysval = SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL; break; case SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE: sysval = SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL; break; case SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID: - if (ctx->compiler->gpu_id < 600) + if (ctx->compiler->gen < 6) sysval = SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL; break; default: @@ -3289,7 +3289,7 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl) * out, we guarantee that all exit paths flow into the stream- * out instructions. */ - if ((ctx->compiler->gpu_id < 500) && + if ((ctx->compiler->gen < 5) && (ctx->so->shader->stream_output.num_outputs > 0) && !ctx->so->binning_pass) { debug_assert(ctx->so->type == MESA_SHADER_VERTEX); @@ -4103,7 +4103,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, * need to make sure not to remove any inputs that are used by * the nonbinning VS. */ - if (ctx->compiler->gpu_id >= 600 && so->binning_pass && + if (ctx->compiler->gen >= 6 && so->binning_pass && so->type == MESA_SHADER_VERTEX) { for (int i = 0; i < ctx->ninputs; i++) { struct ir3_instruction *in = ctx->inputs[i]; @@ -4140,7 +4140,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, array_insert(ctx->block, ctx->block->keeps, end); /* at this point, for binning pass, throw away unneeded outputs: */ - if (so->binning_pass && (ctx->compiler->gpu_id < 600)) + if (so->binning_pass && (ctx->compiler->gen < 6)) fixup_binning_pass(ctx, end); } @@ -4163,7 +4163,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, * that the uniform/constant layout for BS and VS matches, so that * we can re-use same VS_CONST state group. */ - if (so->binning_pass && (ctx->compiler->gpu_id >= 600)) { + if (so->binning_pass && (ctx->compiler->gen >= 6)) { fixup_binning_pass(ctx, find_end(ctx->so->ir)); /* cleanup the result of removing unneeded outputs: */ while (IR3_PASS(ir, ir3_dce, so)) { @@ -4195,7 +4195,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, * * Note that VS inputs are expected to be full precision. */ - bool pre_assign_inputs = (ir->compiler->gpu_id >= 600) && + bool pre_assign_inputs = (ir->compiler->gen >= 6) && (ir->type == MESA_SHADER_VERTEX) && so->binning_pass; diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index e534143..c215baa 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -35,7 +35,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so) { struct ir3_context *ctx = rzalloc(NULL, struct ir3_context); - if (compiler->gpu_id >= 400) { + if (compiler->gen >= 4) { if (so->type == MESA_SHADER_VERTEX) { ctx->astc_srgb = so->key.vastc_srgb; } else if (so->type == MESA_SHADER_FRAGMENT) { @@ -50,9 +50,9 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so) } } - if (compiler->gpu_id >= 600) { + if (compiler->gen >= 6) { ctx->funcs = &ir3_a6xx_funcs; - } else if (compiler->gpu_id >= 400) { + } else if (compiler->gen >= 4) { ctx->funcs = &ir3_a4xx_funcs; } @@ -109,7 +109,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so) /* Enable the texture pre-fetch feature only a4xx onwards. But * only enable it on generations that have been tested: */ - if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gpu_id >= 600)) + if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gen >= 6)) NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch); NIR_PASS(progress, ctx->s, nir_lower_phis_to_scalar, true); diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 1e64e10..b2d8ab2 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -273,7 +273,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) regmask_set(&state->needs_sy, n->dsts[0]); } else if (is_atomic(n->opc)) { if (n->flags & IR3_INSTR_G) { - if (ctx->compiler->gpu_id >= 600) { + if (ctx->compiler->gen >= 6) { /* New encoding, returns result via second src: */ regmask_set(&state->needs_sy, n->srcs[2]); } else { @@ -801,7 +801,7 @@ nop_sched(struct ir3 *ir, struct ir3_shader_variant *so) * a6xx. */ - if ((delay > 0) && (ir->compiler->gpu_id >= 600) && last && + if ((delay > 0) && (ir->compiler->gen >= 6) && last && ((opc_cat(last->opc) == 2) || (opc_cat(last->opc) == 3)) && (last->repeat == 0)) { /* the previous cat2/cat3 instruction can encode at most 3 nop's: */ @@ -870,7 +870,7 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) } } - assert(ctx->early_input_release || ctx->compiler->gpu_id > 500); + assert(ctx->early_input_release || ctx->compiler->gen >= 5); /* process each block: */ do { diff --git a/src/freedreno/ir3/ir3_lower_parallelcopy.c b/src/freedreno/ir3/ir3_lower_parallelcopy.c index 81087d6..32b2d30 100644 --- a/src/freedreno/ir3/ir3_lower_parallelcopy.c +++ b/src/freedreno/ir3/ir3_lower_parallelcopy.c @@ -147,7 +147,7 @@ do_swap(struct ir3_compiler *compiler, struct ir3_instruction *instr, /* a5xx+ is known to support swz, which enables us to swap two registers * in-place. If unsupported we emulate it using the xor trick. */ - if (compiler->gpu_id < 500) { + if (compiler->gen < 5) { /* Shared regs only exist since a5xx, so we don't have to provide a * fallback path for them. */ diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index c0c9a7b..08ba6b8 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -149,7 +149,7 @@ static const nir_shader_compiler_options options_a6xx = { const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler) { - if (compiler->gpu_id >= 600) + if (compiler->gen >= 6) return &options_a6xx; return &options; } @@ -352,7 +352,7 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s) .lower_tg4_offsets = true, }; - if (compiler->gpu_id >= 400) { + if (compiler->gen >= 4) { /* a4xx seems to have *no* sam.p */ tex_options.lower_txp = ~0; /* lower all txp */ } else { @@ -376,7 +376,7 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s) OPT_V(s, nir_lower_tex, &tex_options); OPT_V(s, nir_lower_load_const_to_scalar); - if (compiler->gpu_id < 500) + if (compiler->gen < 5) OPT_V(s, ir3_nir_lower_tg4_to_tex); ir3_optimize_loop(compiler, s); @@ -491,7 +491,7 @@ ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s) NIR_PASS_V(s, nir_lower_fb_read); } - if (compiler->gpu_id >= 600 && s->info.stage == MESA_SHADER_FRAGMENT && + if (compiler->gen >= 6 && s->info.stage == MESA_SHADER_FRAGMENT && !(ir3_shader_debug & IR3_DBG_NOFP16)) { NIR_PASS_V(s, nir_lower_mediump_io, nir_var_shader_out, 0, false); } @@ -671,7 +671,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) /* UBO offset lowering has to come after we've decided what will * be left as load_ubo */ - if (so->shader->compiler->gpu_id >= 600) + if (so->shader->compiler->gen >= 6) progress |= OPT(s, nir_lower_ubo_vec4); OPT_V(s, ir3_nir_lower_io_offsets, so->shader->compiler->gpu_id); @@ -820,7 +820,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, ir3_nir_scan_driver_consts(nir, const_state); - if ((compiler->gpu_id < 500) && (v->shader->stream_output.num_outputs > 0)) { + if ((compiler->gen < 5) && (v->shader->stream_output.num_outputs > 0)) { const_state->num_driver_params = MAX2(const_state->num_driver_params, IR3_DP_VTXCNT_MAX + 1); } @@ -853,13 +853,13 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, if (const_state->num_driver_params > 0) { /* offset cannot be 0 for vs params loaded by CP_DRAW_INDIRECT_MULTI */ - if (v->type == MESA_SHADER_VERTEX && compiler->gpu_id >= 600) + if (v->type == MESA_SHADER_VERTEX && compiler->gen >= 6) constoff = MAX2(constoff, 1); const_state->offsets.driver_param = constoff; } constoff += const_state->num_driver_params / 4; - if ((v->type == MESA_SHADER_VERTEX) && (compiler->gpu_id < 500) && + if ((v->type == MESA_SHADER_VERTEX) && (compiler->gen < 5) && v->shader->stream_output.num_outputs > 0) { const_state->offsets.tfbo = constoff; constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4; diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 67ad1c7..2f4f33c 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -175,14 +175,14 @@ ir3_shader_assemble(struct ir3_shader_variant *v) * uploads are in units of 4 dwords. Round it up here to make calculations * regarding the shared constlen simpler. */ - if (compiler->gpu_id >= 400) + if (compiler->gen >= 4) v->constlen = align(v->constlen, 4); /* Use the per-wave layout by default on a6xx for compute shaders. It * should result in better performance when loads/stores are to a uniform * index. */ - v->pvtmem_per_wave = compiler->gpu_id >= 600 && !info->multi_dword_ldp_stp && + v->pvtmem_per_wave = compiler->gen >= 6 && !info->multi_dword_ldp_stp && v->type == MESA_SHADER_COMPUTE; fixup_regfootprint(v); @@ -332,7 +332,7 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key, v->nonbinning = nonbinning; v->key = *key; v->type = shader->type; - v->mergedregs = shader->compiler->gpu_id >= 600; + v->mergedregs = shader->compiler->gen >= 6; if (!v->binning_pass) v->const_state = rzalloc_size(v, sizeof(*v->const_state)); @@ -494,7 +494,7 @@ ir3_setup_used_key(struct ir3_shader *shader) * on older HW. */ key->msaa = info->fs.uses_sample_qualifier || - (shader->compiler->gpu_id < 600 && + (shader->compiler->gen < 6 && (BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID) || BITSET_TEST(info->system_values_read, @@ -568,7 +568,7 @@ ir3_trim_constlen(struct ir3_shader_variant **variants, * a6xx and the total limit. The frag limit on a6xx only matters for a * single stage, so it's always satisfied with the first variant. */ - if (compiler->gpu_id >= 600) { + if (compiler->gen >= 6) { trimmed |= trim_constlens(constlens, MESA_SHADER_VERTEX, MESA_SHADER_GEOMETRY, compiler->max_const_geom, compiler->max_const_safe); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 8f6f2cf..2633530 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -1034,7 +1034,7 @@ ir3_shader_branchstack_hw(const struct ir3_shader_variant *v) if (!v->shader) return 0; - if (v->shader->compiler->gpu_id < 500) + if (v->shader->compiler->gen < 5) return v->branchstack; if (v->branchstack > 0) { diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c index 3dd0f6a..fb4e851 100644 --- a/src/freedreno/isa/encode.c +++ b/src/freedreno/isa/encode.c @@ -113,10 +113,10 @@ __instruction_case(struct encode_state *s, struct ir3_instruction *instr) } } else if (instr->opc == OPC_DEMOTE) { return OPC_KILL; - } else if ((instr->block->shader->compiler->gpu_id > 600) && + } else if ((instr->block->shader->compiler->gen >= 6) && is_atomic(instr->opc) && (instr->flags & IR3_INSTR_G)) { return instr->opc - OPC_ATOMIC_ADD + OPC_ATOMIC_B_ADD; - } else if (s->compiler->gpu_id >= 600) { + } else if (s->compiler->gen >= 6) { if (instr->opc == OPC_RESINFO) { return OPC_RESINFO_B; } else if (instr->opc == OPC_LDIB) {