From e6556b72fb54bed4e2d9dd2ddac255e27234f3e4 Mon Sep 17 00:00:00 2001 From: Hyunjun Ko Date: Wed, 18 May 2022 10:09:22 +0900 Subject: [PATCH] ir3: handle shared consts. Adds a shared consts base offset and a size of it(dwords) to ir3_compiler since they might be depending on gpu generations. (Danylo Piliaiev ) Adds a flag to present whether shared consts are enabled to ir3_shader_options and then it sets to ir3_const_state when creating an ir3 variant. Although this state is not per-shader state, this is necessary when figureing out real constlens. v1. Define a hw quirk for geometry shared const files and use it when calculating const length. v2. Don't hardcode when calculating a safe const length. Signed-off-by: Hyunjun Ko Part-of: --- src/freedreno/ir3/ir3.c | 20 ++++++++++++++++++++ src/freedreno/ir3/ir3_compiler.c | 4 ++++ src/freedreno/ir3/ir3_compiler.h | 15 +++++++++++++++ src/freedreno/ir3/ir3_shader.c | 27 ++++++++++++++++++++++++--- src/freedreno/ir3/ir3_shader.h | 28 +++++++++++++++++++++++----- 5 files changed, 86 insertions(+), 8 deletions(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 17820de..193b761 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -67,6 +67,22 @@ ir3_destroy(struct ir3 *shader) ralloc_free(shader); } +static bool +is_shared_consts(struct ir3_compiler *compiler, + struct ir3_const_state *const_state, + struct ir3_register *reg) +{ + if (const_state->shared_consts_enable && reg->flags & IR3_REG_CONST) { + uint32_t min_const_reg = regid(compiler->shared_consts_base_offset, 0); + uint32_t max_const_reg = + regid(compiler->shared_consts_base_offset + + compiler->shared_consts_size, 0); + return reg->num >= min_const_reg && min_const_reg < max_const_reg; + } + + return false; +} + static void collect_reg_info(struct ir3_instruction *instr, struct ir3_register *reg, struct ir3_info *info) @@ -79,6 +95,10 @@ collect_reg_info(struct ir3_instruction *instr, struct ir3_register *reg, return; } + /* Shared consts don't need to be included into constlen. */ + if (is_shared_consts(v->compiler, ir3_const_state(v), reg)) + return; + if (!(reg->flags & IR3_REG_R)) { repeat = 0; } diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 2623c01..38d6564 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -224,6 +224,10 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->has_dp2acc = dev_info->a6xx.has_dp2acc; compiler->has_dp4acc = dev_info->a6xx.has_dp4acc; + + compiler->shared_consts_base_offset = 504; + compiler->shared_consts_size = 8; + compiler->geom_shared_consts_size_quirk = 16; } else { compiler->max_const_pipeline = 512; compiler->max_const_geom = 512; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 9b05348..545eb49 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -184,6 +184,21 @@ struct ir3_compiler { bool has_preamble; bool push_ubo_with_preamble; + + /* Where the shared consts start in constants file, in vec4's. */ + uint16_t shared_consts_base_offset; + + /* The size of shared consts for CS and FS(in vec4's). + * Also the size that is actually used on geometry stages (on a6xx). + */ + uint64_t shared_consts_size; + + /* Found on a6xx for geometry stages, that is different from + * actually used shared consts. + * + * TODO: Keep an eye on this for next gens. + */ + uint64_t geom_shared_consts_size_quirk; }; struct ir3_compiler_options { diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index a997c32..bc02d0e 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -376,8 +376,10 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key, v->api_wavesize = shader->api_wavesize; v->real_wavesize = shader->real_wavesize; - if (!v->binning_pass) + if (!v->binning_pass) { v->const_state = rzalloc_size(v, sizeof(*v->const_state)); + v->const_state->shared_consts_enable = shader->shared_consts_enable; + } return v; } @@ -620,6 +622,22 @@ ir3_trim_constlen(struct ir3_shader_variant **variants, uint32_t trimmed = 0; STATIC_ASSERT(MESA_SHADER_STAGES <= 8 * sizeof(trimmed)); + bool shared_consts_enable = + ir3_const_state(variants[MESA_SHADER_VERTEX])->shared_consts_enable; + + /* Use a hw quirk for geometry shared consts, not matched with actual + * shared consts size (on a6xx). + */ + uint32_t shared_consts_size_geom = shared_consts_enable ? + compiler->geom_shared_consts_size_quirk : 0; + + uint32_t shared_consts_size = shared_consts_enable ? + compiler->shared_consts_size : 0; + + uint32_t safe_shared_consts_size = shared_consts_enable ? + ALIGN_POT(MAX2(DIV_ROUND_UP(shared_consts_size_geom, 4), + DIV_ROUND_UP(shared_consts_size, 5)), 4) : 0; + /* There are two shared limits to take into account, the geometry limit on * a6xx and the total limit. The frag limit on a6xx only matters for a * single stage, so it's always satisfied with the first variant. @@ -627,11 +645,13 @@ ir3_trim_constlen(struct ir3_shader_variant **variants, if (compiler->gen >= 6) { trimmed |= trim_constlens(constlens, MESA_SHADER_VERTEX, MESA_SHADER_GEOMETRY, - compiler->max_const_geom, compiler->max_const_safe); + compiler->max_const_geom - shared_consts_size_geom, + compiler->max_const_safe - safe_shared_consts_size); } trimmed |= trim_constlens(constlens, MESA_SHADER_VERTEX, MESA_SHADER_FRAGMENT, - compiler->max_const_pipeline, compiler->max_const_safe); + compiler->max_const_pipeline - shared_consts_size, + compiler->max_const_safe - safe_shared_consts_size); return trimmed; } @@ -653,6 +673,7 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir, shader->num_reserved_user_consts = options->reserved_user_consts; shader->api_wavesize = options->api_wavesize; shader->real_wavesize = options->real_wavesize; + shader->shared_consts_enable = options->shared_consts_enable; shader->nir = nir; ir3_disk_cache_init_shader_key(compiler, shader); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 7b058f5..1288e06 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -214,6 +214,7 @@ struct ir3_const_state { /* State of ubo access lowered to push consts: */ struct ir3_ubo_analysis_state ubo_state; + bool shared_consts_enable; }; /** @@ -876,6 +877,8 @@ struct ir3_shader { * recompiles for GL NOS that doesn't actually apply to the shader. */ struct ir3_shader_key key_mask; + + bool shared_consts_enable; }; /** @@ -893,21 +896,35 @@ ir3_const_state(const struct ir3_shader_variant *v) /* Given a variant, calculate the maximum constlen it can have. */ - static inline unsigned ir3_max_const(const struct ir3_shader_variant *v) { const struct ir3_compiler *compiler = v->compiler; + bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable; + + /* Shared consts size for CS and FS matches with what's acutally used, + * but the size of shared consts for geomtry stages doesn't. + * So we use a hw quirk for geometry shared consts. + */ + uint32_t shared_consts_size = shared_consts_enable ? + compiler->shared_consts_size : 0; + + uint32_t shared_consts_size_geom = shared_consts_enable ? + compiler->geom_shared_consts_size_quirk : 0; + + uint32_t safe_shared_consts_size = shared_consts_enable ? + ALIGN_POT(MAX2(DIV_ROUND_UP(shared_consts_size_geom, 4), + DIV_ROUND_UP(shared_consts_size, 5)), 4) : 0; if ((v->type == MESA_SHADER_COMPUTE) || (v->type == MESA_SHADER_KERNEL)) { - return compiler->max_const_compute; + return compiler->max_const_compute - shared_consts_size; } else if (v->key.safe_constlen) { - return compiler->max_const_safe; + return compiler->max_const_safe - safe_shared_consts_size; } else if (v->type == MESA_SHADER_FRAGMENT) { - return compiler->max_const_frag; + return compiler->max_const_frag - shared_consts_size; } else { - return compiler->max_const_geom; + return compiler->max_const_geom - shared_consts_size_geom; } } @@ -925,6 +942,7 @@ ir3_shader_get_variant(struct ir3_shader *shader, struct ir3_shader_options { unsigned reserved_user_consts; enum ir3_wavesize_option api_wavesize, real_wavesize; + bool shared_consts_enable; }; struct ir3_shader * -- 2.7.4