intel/fs/gen7+: Swap sample mask flag register and FIND_LIVE_CHANNEL temporary.
authorFrancisco Jerez <currojerez@riseup.net>
Sat, 4 Jan 2020 23:48:07 +0000 (15:48 -0800)
committerFrancisco Jerez <currojerez@riseup.net>
Fri, 14 Feb 2020 22:31:48 +0000 (14:31 -0800)
FIND_LIVE_CHANNEL was using f1.0-f1.1 as temporary flag register on
Gen7, instead use f0.0-f0.1.  In order to avoid collision with the
discard sample mask, move the latter to f1.0-f1.1.  This makes room
for keeping track of the sample mask of the second half of SIMD32
programs that use discard.

Note that some MOVs of the sample mask into f1.0 become redundant now
in lower_surface_logical_send() and lower_a64_logical_send().

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>x
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h
src/intel/compiler/brw_fs_builder.h
src/intel/compiler/brw_fs_nir.cpp

index b583454..276eb70 100644 (file)
@@ -5458,15 +5458,17 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
           * vertical predication mode.
           */
          inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;
-         ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2),
-                         sample_mask.type),
-                  sample_mask);
+         if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1)
+            ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2),
+                            sample_mask.type),
+                     sample_mask);
       } else {
          inst->flag_subreg = 2;
          inst->predicate = BRW_PREDICATE_NORMAL;
          inst->predicate_inverse = false;
-         ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
-                  sample_mask);
+         if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1)
+            ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
+                     sample_mask);
       }
    }
 
@@ -5646,8 +5648,9 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
 
       fs_reg sample_mask = sample_mask_reg(bld);
       const fs_builder ubld = bld.group(1, 0).exec_all();
-      ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
-               sample_mask);
+      if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1)
+         ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
+                  sample_mask);
    }
 
    fs_reg payload, payload2;
index 93d3e46..543e760 100644 (file)
@@ -419,13 +419,15 @@ private:
 
 /**
  * Return the flag register used in fragment shaders to keep track of live
- * samples.
+ * samples.  On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
+ * dispatch mode, while earlier generations are constrained to f0.1, which
+ * limits the dispatch width to SIMD16 for fragment shaders that use discard.
  */
 static inline unsigned
 sample_mask_flag_subreg(const fs_visitor *shader)
 {
    assert(shader->stage == MESA_SHADER_FRAGMENT);
-   return 1;
+   return shader->devinfo->gen >= 7 ? 2 : 1;
 }
 
 /**
index fac4f5c..896088c 100644 (file)
@@ -406,7 +406,7 @@ namespace brw {
          const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
          const dst_reg dst = vgrf(src.type);
 
-         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index)->flag_subreg = 2;
+         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
          ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
 
          return src_reg(component(dst, 0));
index 93c6ee2..5d66ead 100644 (file)
@@ -3490,9 +3490,9 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
    case nir_intrinsic_discard:
    case nir_intrinsic_demote_if:
    case nir_intrinsic_discard_if: {
-      /* We track our discarded pixels in f0.1.  By predicating on it, we can
-       * update just the flag bits that aren't yet discarded.  If there's no
-       * condition, we emit a CMP of g0 != g0, so all currently executing
+      /* We track our discarded pixels in f0.1/f1.0.  By predicating on it, we
+       * can update just the flag bits that aren't yet discarded.  If there's
+       * no condition, we emit a CMP of g0 != g0, so all currently executing
        * channels will get turned off.
        */
       fs_inst *cmp = NULL;