From: Connor Abbott Date: Fri, 30 Oct 2020 15:38:40 +0000 (+0100) Subject: ir3: Enable nir_lower_vars_to_scratch on a6xx X-Git-Tag: upstream/21.0.0~2341 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bac6cc586fe4c1b24351e0574d3a961eb631f6ae;p=platform%2Fupstream%2Fmesa.git ir3: Enable nir_lower_vars_to_scratch on a6xx Part-of: --- diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 9080ed7..c338af4 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -97,6 +97,9 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id) /* TODO: implement clip+cull distances on earlier gen's */ compiler->has_clip_cull = true; + /* TODO: implement private memory on earlier gen's */ + compiler->has_pvtmem = true; + if (compiler->gpu_id == 650) compiler->tess_use_shared = true; } else { diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 65bf48f..9924140 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -109,6 +109,9 @@ struct ir3_compiler { /* Whether clip+cull distances are supported */ bool has_clip_cull; + + /* Whether private memory is supported */ + bool has_pvtmem; }; void ir3_compiler_destroy(struct ir3_compiler *compiler); diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 2faa802..8a3f768 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -513,6 +513,17 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) progress |= OPT(s, ir3_nir_lower_ubo_loads, so); + /* Lower large temporaries to scratch, which in Qualcomm terms is private + * memory, to avoid excess register pressure. This should happen after + * nir_opt_large_constants, because loading from a UBO is much, much less + * expensive. + */ + if (so->shader->compiler->has_pvtmem) { + NIR_PASS_V(s, nir_lower_vars_to_scratch, nir_var_function_temp, + 16 * 16 /* bytes */, glsl_get_natural_size_align_bytes); + } + + OPT_V(s, nir_lower_amul, ir3_glsl_type_size); /* UBO offset lowering has to come after we've decided what will