From a4ffc2a445055a81a655e64d57ee393a14a2eb16 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Kristian=20H=C3=B8gsberg?= <krh@bitplanet.net>
Date: Thu, 13 Nov 2014 16:28:19 -0800
Subject: [PATCH] i965: Move fs_visitor ra pass to new
 fs_visitor::allocate_registers()
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

This will be reused for the scalar VS pass.

v2 (Ken): Rebase on master.

Signed-off-by: Kristian HÃ¸gsberg <krh@bitplanet.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 127 +++++++++++++++++++----------------
 src/mesa/drivers/dri/i965/brw_fs.h   |   1 +
 2 files changed, 69 insertions(+), 59 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d2e963f..ef6e6e7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3513,11 +3513,77 @@ fs_visitor::optimize()
    lower_uniform_pull_constant_loads();
 }
 
+void
+fs_visitor::allocate_registers()
+{
+   bool allocated_without_spills;
+
+   static enum instruction_scheduler_mode pre_modes[] = {
+      SCHEDULE_PRE,
+      SCHEDULE_PRE_NON_LIFO,
+      SCHEDULE_PRE_LIFO,
+   };
+
+   /* Try each scheduling heuristic to see if it can successfully register
+    * allocate without spilling.  They should be ordered by decreasing
+    * performance but increasing likelihood of allocating.
+    */
+   for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
+      schedule_instructions(pre_modes[i]);
+
+      if (0) {
+         assign_regs_trivial();
+         allocated_without_spills = true;
+      } else {
+         allocated_without_spills = assign_regs(false);
+      }
+      if (allocated_without_spills)
+         break;
+   }
+
+   if (!allocated_without_spills) {
+      /* We assume that any spilling is worse than just dropping back to
+       * SIMD8.  There's probably actually some intermediate point where
+       * SIMD16 with a couple of spills is still better.
+       */
+      if (dispatch_width == 16) {
+         fail("Failure to register allocate.  Reduce number of "
+              "live scalar values to avoid this.");
+      } else {
+         perf_debug("Fragment shader triggered register spilling.  "
+                    "Try reducing the number of live scalar values to "
+                    "improve performance.\n");
+      }
+
+      /* Since we're out of heuristics, just go spill registers until we
+       * get an allocation.
+       */
+      while (!assign_regs(true)) {
+         if (failed)
+            break;
+      }
+   }
+
+   /* This must come after all optimization and register allocation, since
+    * it inserts dead code that happens to have side effects, and it does
+    * so based on the actual physical registers in use.
+    */
+   insert_gen4_send_dependency_workarounds();
+
+   if (failed)
+      return;
+
+   if (!allocated_without_spills)
+      schedule_instructions(SCHEDULE_POST);
+
+   if (last_scratch > 0)
+      prog_data->total_scratch = brw_get_scratch_size(last_scratch);
+}
+
 bool
 fs_visitor::run()
 {
    sanity_param_count = prog->Parameters->NumParameters;
-   bool allocated_without_spills;
 
    assign_binding_table_offsets();
 
@@ -3530,7 +3596,6 @@ fs_visitor::run()
       emit_dummy_fs();
    } else if (brw->use_rep_send && dispatch_width == 16) {
       emit_repclear_shader();
-      allocated_without_spills = true;
    } else {
       if (INTEL_DEBUG & DEBUG_SHADER_TIME)
          emit_shader_time_begin();
@@ -3585,66 +3650,10 @@ fs_visitor::run()
       assign_curb_setup();
       assign_urb_setup();
 
-      static enum instruction_scheduler_mode pre_modes[] = {
-         SCHEDULE_PRE,
-         SCHEDULE_PRE_NON_LIFO,
-         SCHEDULE_PRE_LIFO,
-      };
-
-      /* Try each scheduling heuristic to see if it can successfully register
-       * allocate without spilling.  They should be ordered by decreasing
-       * performance but increasing likelihood of allocating.
-       */
-      for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
-         schedule_instructions(pre_modes[i]);
-
-         if (0) {
-            assign_regs_trivial();
-            allocated_without_spills = true;
-         } else {
-            allocated_without_spills = assign_regs(false);
-         }
-         if (allocated_without_spills)
-            break;
-      }
-
-      if (!allocated_without_spills) {
-         /* We assume that any spilling is worse than just dropping back to
-          * SIMD8.  There's probably actually some intermediate point where
-          * SIMD16 with a couple of spills is still better.
-          */
-         if (dispatch_width == 16) {
-            fail("Failure to register allocate.  Reduce number of "
-                 "live scalar values to avoid this.");
-         } else {
-            perf_debug("Fragment shader triggered register spilling.  "
-                       "Try reducing the number of live scalar values to "
-                       "improve performance.\n");
-         }
-
-         /* Since we're out of heuristics, just go spill registers until we
-          * get an allocation.
-          */
-         while (!assign_regs(true)) {
-            if (failed)
-               break;
-         }
-      }
-
-      /* This must come after all optimization and register allocation, since
-       * it inserts dead code that happens to have side effects, and it does
-       * so based on the actual physical registers in use.
-       */
-      insert_gen4_send_dependency_workarounds();
+      allocate_registers();
 
       if (failed)
          return false;
-
-      if (!allocated_without_spills)
-         schedule_instructions(SCHEDULE_POST);
-
-      if (last_scratch > 0)
-         prog_data->total_scratch = brw_get_scratch_size(last_scratch);
    }
 
    if (stage == MESA_SHADER_FRAGMENT) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 676087e..15eb76a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -404,6 +404,7 @@ public:
 
    bool run();
    void optimize();
+   void allocate_registers();
    void assign_binding_table_offsets();
    void setup_payload_gen4();
    void setup_payload_gen6();
-- 
2.7.4