i965/fs: Rewrite discards to use a flag subreg to track discarded pixels.
authorEric Anholt <eric@anholt.net>
Thu, 6 Dec 2012 20:15:13 +0000 (12:15 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 11 Dec 2012 18:13:08 +0000 (10:13 -0800)
This makes much more sense on gen6+, and will also prove useful for
early exit of shaders on discard.

v2: fix up a stale comment from before converting gen4-5.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_emit.cpp
src/mesa/drivers/dri/i965/brw_fs_fp.cpp
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 5e00b40..2b77ae6 100644 (file)
@@ -673,7 +673,6 @@ enum opcode {
    FS_OPCODE_PIXEL_Y,
    FS_OPCODE_CINTERP,
    FS_OPCODE_LINTERP,
-   FS_OPCODE_DISCARD,
    FS_OPCODE_SPILL,
    FS_OPCODE_UNSPILL,
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
index dbf48f8..ac0bb56 100644 (file)
@@ -2434,6 +2434,14 @@ fs_visitor::run()
       else
         emit_interpolation_setup_gen6();
 
+      /* We handle discards by keeping track of the still-live pixels in f0.1.
+       * Initialize it with the dispatched pixels.
+       */
+      if (fp->UsesKill) {
+         fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+         discard_init->flag_subreg = 1;
+      }
+
       /* Generate FS IR for main().  (the visitor only descends into
        * functions called "main").
        */
index b60a37e..b00755f 100644 (file)
@@ -501,7 +501,6 @@ private:
    void generate_math_gen4(fs_inst *inst,
                           struct brw_reg dst,
                           struct brw_reg src);
-   void generate_discard(fs_inst *inst);
    void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
    void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
                      bool negate_value);
index b3d7f81..f185eb5 100644 (file)
@@ -74,6 +74,17 @@ fs_generator::generate_fb_write(fs_inst *inst)
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
+   if (fp->UsesKill) {
+      struct brw_reg pixel_mask;
+
+      if (intel->gen >= 6)
+         pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+      else
+         pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+      brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
+   }
+
    if (inst->header_present) {
       if (intel->gen >= 6) {
         brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
@@ -514,58 +525,6 @@ fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src
 }
 
 void
-fs_generator::generate_discard(fs_inst *inst)
-{
-   struct brw_reg f0 = brw_flag_reg(0, 0);
-
-   if (intel->gen >= 6) {
-      struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
-      struct brw_reg some_register;
-
-      /* As of gen6, we no longer have the mask register to look at,
-       * so life gets a bit more complicated.
-       */
-
-      /* Load the flag register with all ones. */
-      brw_push_insn_state(p);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_MOV(p, f0, brw_imm_uw(0xffff));
-      brw_pop_insn_state(p);
-
-      /* Do a comparison that should always fail, to produce 0s in the flag
-       * reg where we have active channels.
-       */
-      some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
-      brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
-             BRW_CONDITIONAL_NZ, some_register, some_register);
-
-      /* Undo CMP's whacking of predication*/
-      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
-      brw_push_insn_state(p);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_AND(p, g1, f0, g1);
-      brw_pop_insn_state(p);
-   } else {
-      struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
-      brw_push_insn_state(p);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-
-      /* Unlike the 965, we have the mask reg, so we just need
-       * somewhere to invert that (containing channels to be disabled)
-       * so it can be ANDed with the mask of pixels still to be
-       * written. Use the flag reg for consistency with gen6+.
-       */
-      brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
-      brw_AND(p, g0, f0, g0);
-
-      brw_pop_insn_state(p);
-   }
-}
-
-void
 fs_generator::generate_spill(fs_inst *inst, struct brw_reg src)
 {
    assert(inst->mlen != 0);
@@ -745,12 +704,16 @@ void
 fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
 {
    struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
-   struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+   struct brw_reg dispatch_mask;
+
+   if (intel->gen >= 6)
+      dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+   else
+      dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 
-   assert (intel->gen >= 6);
    brw_push_insn_state(p);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, flags, g1);
+   brw_MOV(p, flags, dispatch_mask);
    brw_pop_insn_state(p);
 }
 
@@ -1083,9 +1046,6 @@ fs_generator::generate_code(exec_list *instructions)
       case SHADER_OPCODE_TXS:
         generate_tex(inst, dst, src[0]);
         break;
-      case FS_OPCODE_DISCARD:
-        generate_discard(inst);
-        break;
       case FS_OPCODE_DDX:
         generate_ddx(inst, dst, src[0]);
         break;
index 4be7779..bebf059 100644 (file)
@@ -252,14 +252,15 @@ fs_visitor::emit_fragment_program_code()
                continue;
             }
 
-            emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
-                     BRW_CONDITIONAL_L));
-
-            if (intel->gen < 6 && dispatch_width == 16)
-               fail("Can't support (non-uniform) control flow on 16-wide");
-            emit(IF(BRW_PREDICATE_NORMAL));
-            emit(FS_OPCODE_DISCARD);
-            emit(BRW_OPCODE_ENDIF);
+
+            /* Emit an instruction that's predicated on the current
+             * undiscarded pixels, and updates just those pixels to be
+             * turned off.
+             */
+            fs_inst *cmp = emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
+                                    BRW_CONDITIONAL_GE));
+            cmp->predicate = BRW_PREDICATE_NORMAL;
+            cmp->flag_subreg = 1;
          }
          break;
       }
index a4fc032..d1147f5 100644 (file)
@@ -277,9 +277,6 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
          payload_last_use_ip[0 / reg_width] = use_ip;
          payload_last_use_ip[1 / reg_width] = use_ip;
          break;
-      case FS_OPCODE_DISCARD:
-         payload_last_use_ip[1 / reg_width] = use_ip;
-         break;
 
       case FS_OPCODE_LINTERP:
          /* On gen6+ in 16-wide, there are 4 adjacent registers (so 2 nodes)
index 96d1131..e9c25b0 100644 (file)
@@ -299,7 +299,6 @@ instruction_scheduler::calculate_deps()
       }
 
       if (inst->predicate) {
-        assert(last_conditional_mod[inst->flag_subreg]);
         add_dep(last_conditional_mod[inst->flag_subreg], n);
       }
 
index f1c6860..98cd064 100644 (file)
@@ -1435,7 +1435,17 @@ fs_visitor::visit(ir_discard *ir)
 {
    assert(ir->condition == NULL); /* FINISHME */
 
-   emit(FS_OPCODE_DISCARD);
+   /* We track our discarded pixels in f0.1.  By predicating on it, we can
+    * update just the flag bits that aren't yet discarded.  By emitting a
+    * CMP of g0 != g0, all our currently executing channels will get turned
+    * off.
+    */
+   fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
+                                   BRW_REGISTER_TYPE_UW));
+   fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
+                           BRW_CONDITIONAL_NZ));
+   cmp->predicate = BRW_PREDICATE_NORMAL;
+   cmp->flag_subreg = 1;
 }
 
 void