ir3: Enable nir_lower_vars_to_scratch on a6xx

author Connor Abbott <cwabbott0@gmail.com>

Fri, 30 Oct 2020 15:38:40 +0000 (16:38 +0100)

committer Connor Abbott <cwabbott0@gmail.com>

Thu, 19 Nov 2020 16:55:58 +0000 (17:55 +0100)
author Connor Abbott <cwabbott0@gmail.com>
Fri, 30 Oct 2020 15:38:40 +0000 (16:38 +0100)
committer Connor Abbott <cwabbott0@gmail.com>
Thu, 19 Nov 2020 16:55:58 +0000 (17:55 +0100)
diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c

index 9080ed7..c338af4 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -97,6 +97,9 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id)
                 /* TODO: implement clip+cull distances on earlier gen's */
                 compiler->has_clip_cull = true;
  
+               /* TODO: implement private memory on earlier gen's */
+               compiler->has_pvtmem = true;
+
                 if (compiler->gpu_id == 650)
                         compiler->tess_use_shared = true;
         } else {
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h

index 65bf48f..9924140 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@@ -109,6 +109,9 @@ struct ir3_compiler {
  
         /* Whether clip+cull distances are supported */
         bool has_clip_cull;
+
+       /* Whether private memory is supported */
+       bool has_pvtmem;
  };
  
  void ir3_compiler_destroy(struct ir3_compiler *compiler);
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c

index 2faa802..8a3f768 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -513,6 +513,17 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
  
         progress |= OPT(s, ir3_nir_lower_ubo_loads, so);
  
+       /* Lower large temporaries to scratch, which in Qualcomm terms is private
+        * memory, to avoid excess register pressure. This should happen after
+        * nir_opt_large_constants, because loading from a UBO is much, much less
+        * expensive.
+        */
+       if (so->shader->compiler->has_pvtmem) {
+               NIR_PASS_V(s, nir_lower_vars_to_scratch, nir_var_function_temp,
+                                  16 * 16 /* bytes */, glsl_get_natural_size_align_bytes);
+       }
+
+
         OPT_V(s, nir_lower_amul, ir3_glsl_type_size);
  
         /* UBO offset lowering has to come after we've decided what will
author	Connor Abbott <cwabbott0@gmail.com>
	Fri, 30 Oct 2020 15:38:40 +0000 (16:38 +0100)
committer	Connor Abbott <cwabbott0@gmail.com>
	Thu, 19 Nov 2020 16:55:58 +0000 (17:55 +0100)
src/freedreno/ir3/ir3_compiler.c		patch \| blob \| history
src/freedreno/ir3/ir3_compiler.h		patch \| blob \| history
src/freedreno/ir3/ir3_nir.c		patch \| blob \| history