broadcom/compiler: allow compilation strategies to limit minimum thread count
authorIago Toral Quiroga <itoral@igalia.com>
Tue, 6 Apr 2021 11:39:23 +0000 (13:39 +0200)
committerMarge Bot <eric+marge@anholt.net>
Fri, 9 Apr 2021 10:31:40 +0000 (10:31 +0000)
This adds a minimum thread count parameter to each compilation strategy with
the intention to limit the minimum allowed thread count that can be used to
register allocate with that strategy.

For now all strategies allow the minimum thread count supported by the
hardware, but we will be using this infrastructure to impose a more
strict limit in an upcoming optimization.

Reviewed-by: Alejandro PiƱeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10100>

src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c

index fbfb574..3c570f1 100644 (file)
@@ -3919,11 +3919,11 @@ v3d_nir_to_vir(struct v3d_compile *c)
                         }
                 }
 
-                if (c->threads == min_threads) {
-                        if (c->fallback_scheduler) {
+                if (c->threads <= MAX2(c->min_threads_for_reg_alloc, min_threads)) {
+                        if (V3D_DEBUG & V3D_DEBUG_PERF) {
                                 fprintf(stderr,
-                                        "Failed to register allocate at %d "
-                                        "threads with any strategy.\n",
+                                        "Failed to register allocate %s at "
+                                        "%d threads.\n", vir_get_stage_name(c),
                                         c->threads);
                         }
                         c->compilation_result =
index 40447e5..6ea0e8b 100644 (file)
@@ -654,6 +654,14 @@ struct v3d_compile {
          */
         bool disable_ldunif_opt;
 
+        /* Minimum number of threads we are willing to use to register allocate
+         * a shader with the current compilation strategy. This only prevents
+         * us from lowering the thread count to register allocate successfully,
+         * which can be useful when we prefer doing other changes to the
+         * compilation strategy before dropping thread count.
+         */
+        uint32_t min_threads_for_reg_alloc;
+
         /* Last UBO index and offset used with a unifa/ldunifa sequence and the
          * block where it was emitted. This is used to skip unifa writes (and
          * their 3 delay slot) when the next UBO load reads right after the
index c5f5ca1..941dc5b 100644 (file)
@@ -525,6 +525,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
                                       void *debug_output_data),
                  void *debug_output_data,
                  int program_id, int variant_id,
+                 uint32_t min_threads_for_reg_alloc,
                  bool disable_tmu_pipelining,
                  bool fallback_scheduler)
 {
@@ -539,6 +540,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
         c->debug_output = debug_output;
         c->debug_output_data = debug_output_data;
         c->compilation_result = V3D_COMPILATION_SUCCEEDED;
+        c->min_threads_for_reg_alloc = min_threads_for_reg_alloc;
         c->fallback_scheduler = fallback_scheduler;
         c->disable_tmu_pipelining = disable_tmu_pipelining;
 
@@ -1264,16 +1266,34 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
 {
         struct v3d_compile *c;
 
-        static const char *strategies[] = {
-                "default",
-                "disable TMU pipelining",
-                "fallback scheduler"
+        /* This is a list of incremental changes to the compilation strategy
+         * that will be used to try to compile the shader successfully. The
+         * default strategy is to enable all optimizations which will have
+         * the highest register pressure but is expected to produce most
+         * optimal code. Following strategies incrementally disable specific
+         * optimizations that are known to contribute to register pressure
+         * in order to be able to compile the shader successfully while meeting
+         * thread count requirements.
+         *
+         * V3D 4.1+ has a min thread count of 2, but we can use 1 here to also
+         * cover previous hardware as well (meaning that we are not limiting
+         * register allocation to any particular thread count). This is fine
+         * because v3d_nir_to_vir will cap this to the actual minimum.
+         */
+        struct v3d_compiler_strategy {
+                const char *name;
+                uint32_t min_threads_for_reg_alloc;
+        } static const strategies[] = {
+                { "default",                  1 },
+                { "disable TMU pipelining",   1 },
+                { "fallback scheduler",       1 }
         };
 
         for (int i = 0; i < ARRAY_SIZE(strategies); i++) {
                 c = vir_compile_init(compiler, key, s,
                                      debug_output, debug_output_data,
                                      program_id, variant_id,
+                                     strategies[i].min_threads_for_reg_alloc,
                                      i > 0, /* Disable TMU pipelining */
                                      i > 1  /* Fallback_scheduler */);
 
@@ -1289,7 +1309,7 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
                 char *debug_msg;
                 int ret = asprintf(&debug_msg,
                                    "Falling back to strategy '%s' for %s",
-                                   strategies[i + 1],
+                                   strategies[i + 1].name,
                                    vir_get_stage_name(c));
 
                 if (ret >= 0) {
@@ -1320,6 +1340,11 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
                 }
         }
 
+        if (c->compilation_result != V3D_COMPILATION_SUCCEEDED) {
+                fprintf(stderr, "Failed to compile %s with any strategy.\n",
+                        vir_get_stage_name(c));
+        }
+
         struct v3d_prog_data *prog_data;
 
         prog_data = rzalloc_size(NULL, v3d_prog_data_size(c->s->info.stage));