i965: Tighten up the check for flow control interfering with coalescing.
authorEric Anholt <eric@anholt.net>
Tue, 11 Jan 2011 23:13:49 +0000 (15:13 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 12 Jan 2011 00:04:25 +0000 (16:04 -0800)
This greatly improves codegen for programs with flow control by
allowing coalescing for all instructions at the top level, not just
ones that follow the last flow control in the program.

src/mesa/drivers/dri/i965/brw_fs.cpp

index 22e6e2e..35bce0f 100644 (file)
@@ -2929,10 +2929,35 @@ bool
 fs_visitor::register_coalesce()
 {
    bool progress = false;
+   int if_depth = 0;
+   int loop_depth = 0;
 
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       fs_inst *inst = (fs_inst *)iter.get();
 
+      /* Make sure that we dominate the instructions we're going to
+       * scan for interfering with our coalescing, or we won't have
+       * scanned enough to see if anything interferes with our
+       * coalescing.  We don't dominate the following instructions if
+       * we're in a loop or an if block.
+       */
+      switch (inst->opcode) {
+      case BRW_OPCODE_DO:
+        loop_depth++;
+        break;
+      case BRW_OPCODE_WHILE:
+        loop_depth--;
+        break;
+      case BRW_OPCODE_IF:
+        if_depth++;
+        break;
+      case BRW_OPCODE_ENDIF:
+        if_depth--;
+        break;
+      }
+      if (loop_depth || if_depth)
+        continue;
+
       if (inst->opcode != BRW_OPCODE_MOV ||
          inst->predicated ||
          inst->saturate ||
@@ -2950,14 +2975,6 @@ fs_visitor::register_coalesce()
       for (; scan_iter.has_next(); scan_iter.next()) {
         fs_inst *scan_inst = (fs_inst *)scan_iter.get();
 
-        if (scan_inst->opcode == BRW_OPCODE_DO ||
-            scan_inst->opcode == BRW_OPCODE_WHILE ||
-            scan_inst->opcode == BRW_OPCODE_ENDIF) {
-           interfered = true;
-           iter = scan_iter;
-           break;
-        }
-
         if (scan_inst->dst.file == GRF) {
            if (scan_inst->dst.reg == inst->dst.reg &&
                (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
@@ -2977,10 +2994,6 @@ fs_visitor::register_coalesce()
         continue;
       }
 
-      /* Update live interval so we don't have to recalculate. */
-      this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg],
-                                                    virtual_grf_use[inst->dst.reg]);
-
       /* Rewrite the later usage to point at the source of the move to
        * be removed.
        */
@@ -3617,6 +3630,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
         v.calculate_live_intervals();
         progress = v.propagate_constants() || progress;
         progress = v.register_coalesce() || progress;
+        v.calculate_live_intervals();
         progress = v.compute_to_mrf() || progress;
         progress = v.dead_code_eliminate() || progress;
       } while (progress);