intel/vec4: Remove all support for Gen8+ [v2]

author Ian Romanick <ian.d.romanick@intel.com>

Tue, 22 Sep 2020 20:09:56 +0000 (13:09 -0700)

committer Ian Romanick <ian.d.romanick@intel.com>

Mon, 28 Sep 2020 18:43:10 +0000 (11:43 -0700)
author Ian Romanick <ian.d.romanick@intel.com>
Tue, 22 Sep 2020 20:09:56 +0000 (13:09 -0700)
committer Ian Romanick <ian.d.romanick@intel.com>
Mon, 28 Sep 2020 18:43:10 +0000 (11:43 -0700)
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp

index ee91be0..878ee4e 100644 (file)
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1009,23 +1009,7 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
  
  #define IS_64BIT(reg) (reg.file != BAD_FILE && type_sz(reg.type) == 8)
  
-   /* From the Cherryview and Broadwell PRMs:
-    *
-    * "When source or destination datatype is 64b or operation is integer DWord
-    * multiply, DepCtrl must not be used."
-    *
-    * SKL PRMs don't include this restriction, however, gen7 seems to be
-    * affected, at least by the 64b restriction, since DepCtrl with double
-    * precision instructions seems to produce GPU hangs in some cases.
-    */
-   if (devinfo->gen == 8 || gen_device_info_is_9lp(devinfo)) {
-      if (inst->opcode == BRW_OPCODE_MUL &&
-         IS_DWORD(inst->src[0]) &&
-         IS_DWORD(inst->src[1]))
-         return true;
-   }
-
-   if (devinfo->gen >= 7 && devinfo->gen <= 8) {
+   if (devinfo->gen >= 7) {
        if (IS_64BIT(inst->dst) || IS_64BIT(inst->src[0]) ||
            IS_64BIT(inst->src[1]) || IS_64BIT(inst->src[2]))
        return true;
@@ -1034,11 +1018,6 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
  #undef IS_64BIT
  #undef IS_DWORD
  
-   if (devinfo->gen >= 8) {
-      if (inst->opcode == BRW_OPCODE_F32TO16)
-         return true;
-   }
-
     /*
      * mlen:
      * In the presence of send messages, totally interrupt dependency
@@ -1912,7 +1891,7 @@ vec4_visitor::lower_minmax()
  src_reg
  vec4_visitor::get_timestamp()
  {
-   assert(devinfo->gen >= 7);
+   assert(devinfo->gen == 7);
  
     src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
                                  BRW_ARF_TIMESTAMP,
diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h

index 73c18b6..a6015b8 100644 (file)
--- a/src/intel/compiler/brw_vec4.h
+++ b/src/intel/compiler/brw_vec4.h
@@ -241,7 +241,6 @@ public:
     void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
  
     src_reg fix_3src_operand(const src_reg &src);
-   src_reg resolve_source_modifiers(const src_reg &src);
  
     vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
                                 const src_reg &src1 = src_reg());
diff --git a/src/intel/compiler/brw_vec4_copy_propagation.cpp b/src/intel/compiler/brw_vec4_copy_propagation.cpp

index 8fb5d1c..9e4637e 100644 (file)
--- a/src/intel/compiler/brw_vec4_copy_propagation.cpp
+++ b/src/intel/compiler/brw_vec4_copy_propagation.cpp
@@ -78,15 +78,6 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
             inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)));
  }
  
-static bool
-is_logic_op(enum opcode opcode)
-{
-   return (opcode == BRW_OPCODE_AND ||
-           opcode == BRW_OPCODE_OR  ||
-           opcode == BRW_OPCODE_XOR ||
-           opcode == BRW_OPCODE_NOT);
-}
-
  /**
   * Get the origin of a copy as a single register if all components present in
   * the given readmask originate from the same register and have compatible
@@ -132,8 +123,7 @@ get_copy_value(const copy_entry &entry, unsigned readmask)
  }
  
  static bool
-try_constant_propagate(const struct gen_device_info *devinfo,
-                       vec4_instruction *inst,
+try_constant_propagate(vec4_instruction *inst,
                         int arg, const copy_entry *entry)
  {
     /* For constant propagation, we only handle the same constant
@@ -169,17 +159,13 @@ try_constant_propagate(const struct gen_device_info *devinfo,
     }
  
     if (inst->src[arg].abs) {
-      if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
-          !brw_abs_immediate(value.type, &value.as_brw_reg())) {
+      if (!brw_abs_immediate(value.type, &value.as_brw_reg()))
           return false;
-      }
     }
  
     if (inst->src[arg].negate) {
-      if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
-          !brw_negate_immediate(value.type, &value.as_brw_reg())) {
+      if (!brw_negate_immediate(value.type, &value.as_brw_reg()))
           return false;
-      }
     }
  
     value = swizzle(value, inst->src[arg].swizzle);
@@ -200,9 +186,7 @@ try_constant_propagate(const struct gen_device_info *devinfo,
     case SHADER_OPCODE_POW:
     case SHADER_OPCODE_INT_QUOTIENT:
     case SHADER_OPCODE_INT_REMAINDER:
-      if (devinfo->gen < 8)
           break;
-      /* fallthrough */
     case BRW_OPCODE_DP2:
     case BRW_OPCODE_DP3:
     case BRW_OPCODE_DP4:
@@ -333,11 +317,10 @@ try_copy_propagate(const struct gen_device_info *devinfo,
         value.file != ATTR)
        return false;
  
-   /* In gen < 8 instructions that write 2 registers also need to read 2
-    * registers. Make sure we don't break that restriction by copy
-    * propagating from a uniform.
+   /* Instructions that write 2 registers also need to read 2 registers. Make
+    * sure we don't break that restriction by copy propagating from a uniform.
      */
-   if (devinfo->gen < 8 && inst->size_written > REG_SIZE && is_uniform(value))
+   if (inst->size_written > REG_SIZE && is_uniform(value))
        return false;
  
     /* There is a regioning restriction such that if execsize == width
@@ -358,11 +341,6 @@ try_copy_propagate(const struct gen_device_info *devinfo,
     if (type_sz(value.type) != type_sz(inst->src[arg].type))
        return false;
  
-   if (devinfo->gen >= 8 && (value.negate || value.abs) &&
-       is_logic_op(inst->opcode)) {
-      return false;
-   }
-
     if (inst->src[arg].offset % REG_SIZE || value.offset % REG_SIZE)
        return false;
  
@@ -516,7 +494,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
                                 inst->src[i].offset / REG_SIZE);
           const copy_entry &entry = entries[reg];
  
-         if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
+         if (do_constant_prop && try_constant_propagate(inst, i, &entry))
              progress = true;
           else if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg))
             progress = true;
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp

index d1aa40e..3afaaed 100644 (file)
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -130,7 +130,7 @@ generate_tex(struct brw_codegen *p,
        case SHADER_OPCODE_TXD:
           if (inst->shadow_compare) {
              /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
-            assert(devinfo->gen >= 8 || devinfo->is_haswell);
+            assert(devinfo->is_haswell);
              msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
           } else {
              msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
@@ -139,10 +139,6 @@ generate_tex(struct brw_codegen *p,
        case SHADER_OPCODE_TXF:
          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
          break;
-      case SHADER_OPCODE_TXF_CMS_W:
-         assert(devinfo->gen >= 9);
-         msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
-         break;
        case SHADER_OPCODE_TXF_CMS:
           if (devinfo->gen >= 7)
              msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
@@ -234,12 +230,6 @@ generate_tex(struct brw_codegen *p,
              /* Set the texel offset bits in DWord 2. */
              dw2 = inst->offset;
  
-         if (devinfo->gen >= 9)
-            /* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D,
-             * based on bit 22 in the header.
-             */
-            dw2 |= GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2;
-
           /* The VS, DS, and FS stages have the g0.2 payload delivered as 0,
            * so header0.2 is 0 when g0 is copied.  The HS and GS stages do
            * not, so we must set to to 0 to avoid setting undesirable bits
@@ -472,29 +462,24 @@ generate_gs_set_vertex_count(struct brw_codegen *p,
     brw_push_insn_state(p);
     brw_set_default_mask_control(p, BRW_MASK_DISABLE);
  
-   if (p->devinfo->gen >= 8) {
-      /* Move the vertex count into the second MRF for the EOT write. */
-      brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD),
-              src);
-   } else {
-      /* If we think of the src and dst registers as composed of 8 DWORDs each,
-       * we want to pick up the contents of DWORDs 0 and 4 from src, truncate
-       * them to WORDs, and then pack them into DWORD 2 of dst.
-       *
-       * It's easier to get the EU to do this if we think of the src and dst
-       * registers as composed of 16 WORDS each; then, we want to pick up the
-       * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5
-       * of dst.
-       *
-       * We can do that by the following EU instruction:
-       *
-       *     mov (2) dst.4<1>:uw src<8;1,0>:uw   { Align1, Q1, NoMask }
-       */
-      brw_set_default_access_mode(p, BRW_ALIGN_1);
-      brw_MOV(p,
-              suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
-              stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
-   }
+   /* If we think of the src and dst registers as composed of 8 DWORDs each,
+    * we want to pick up the contents of DWORDs 0 and 4 from src, truncate
+    * them to WORDs, and then pack them into DWORD 2 of dst.
+    *
+    * It's easier to get the EU to do this if we think of the src and dst
+    * registers as composed of 16 WORDS each; then, we want to pick up the
+    * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5
+    * of dst.
+    *
+    * We can do that by the following EU instruction:
+    *
+    *     mov (2) dst.4<1>:uw src<8;1,0>:uw   { Align1, Q1, NoMask }
+    */
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_MOV(p,
+           suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
+           stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
+
     brw_pop_insn_state(p);
  }
  
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp

index 615b296..9ee2765 100644 (file)
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -214,35 +214,17 @@ vec4_gs_visitor::emit_thread_end()
      */
     int base_mrf = 1;
  
-   bool static_vertex_count = gs_prog_data->static_vertex_count != -1;
-
-   /* If the previous instruction was a URB write, we don't need to issue
-    * a second one - we can just set the EOT bit on the previous write.
-    *
-    * Skip this on Gen8+ unless there's a static vertex count, as we also
-    * need to write the vertex count out, and combining the two may not be
-    * possible (or at least not straightforward).
-    */
-   vec4_instruction *last = (vec4_instruction *) instructions.get_tail();
-   if (last && last->opcode == GS_OPCODE_URB_WRITE &&
-       !(INTEL_DEBUG & DEBUG_SHADER_TIME) &&
-       devinfo->gen >= 8 && static_vertex_count) {
-      last->urb_write_flags = BRW_URB_WRITE_EOT | last->urb_write_flags;
-      return;
-   }
-
     current_annotation = "thread end";
     dst_reg mrf_reg(MRF, base_mrf);
     src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
     vec4_instruction *inst = emit(MOV(mrf_reg, r0));
     inst->force_writemask_all = true;
-   if (devinfo->gen < 8 || !static_vertex_count)
-      emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
+   emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
     if (INTEL_DEBUG & DEBUG_SHADER_TIME)
        emit_shader_time_end();
     inst = emit(GS_OPCODE_THREAD_END);
     inst->base_mrf = base_mrf;
-   inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1;
+   inst->mlen = 1;
  }
  
  
@@ -279,12 +261,6 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
     vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
     inst->offset = gs_prog_data->control_data_header_size_hwords;
  
-   /* We need to increment Global Offset by 1 to make room for Broadwell's
-    * extra "Vertex Count" payload at the beginning of the URB entry.
-    */
-   if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
-      inst->offset++;
-
     inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
     return inst;
  }
@@ -398,13 +374,6 @@ vec4_gs_visitor::emit_control_data_bits()
     inst->force_writemask_all = true;
     inst = emit(GS_OPCODE_URB_WRITE);
     inst->urb_write_flags = urb_write_flags;
-   /* We need to increment Global Offset by 256-bits to make room for
-    * Broadwell's extra "Vertex Count" payload at the beginning of the
-    * URB entry.  Since this is an OWord message, Global Offset is counted
-    * in 128-bit units, so we must set it to 2.
-    */
-   if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
-      inst->offset = 2;
     inst->base_mrf = base_mrf;
     inst->mlen = 2;
  }
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp

index 3825a84..a2b691b 100644 (file)
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -283,10 +283,7 @@ static src_reg
  setup_imm_df(const vec4_builder &bld, double v)
  {
     const gen_device_info *devinfo = bld.shader->devinfo;
-   assert(devinfo->gen >= 7);
-
-   if (devinfo->gen >= 8)
-      return brw_imm_df(v);
+   assert(devinfo->gen == 7);
  
     /* gen7.5 does not support DF immediates straighforward but the DIM
      * instruction allows to set the 64-bit immediate value.
@@ -463,7 +460,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
     }
  
     case nir_intrinsic_store_ssbo: {
-      assert(devinfo->gen >= 7);
+      assert(devinfo->gen == 7);
  
        /* brw_nir_lower_mem_access_bit_sizes takes care of this */
        assert(nir_src_bit_size(instr->src[0]) == 32);
@@ -525,7 +522,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
     }
  
     case nir_intrinsic_load_ssbo: {
-      assert(devinfo->gen >= 7);
+      assert(devinfo->gen == 7);
  
        /* brw_nir_lower_mem_access_bit_sizes takes care of this */
        assert(nir_dest_bit_size(instr->dest) == 32);
@@ -867,16 +864,6 @@ emit_find_msb_using_lzd(const vec4_builder &bld,
  void
  vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src)
  {
-   /* BDW PRM vol 15 - workarounds:
-    * DF->f format conversion for Align16 has wrong emask calculation when
-    * source is immediate.
-    */
-   if (devinfo->gen == 8 && dst.type == BRW_REGISTER_TYPE_F &&
-       src.file == BRW_IMMEDIATE_VALUE) {
-      emit(MOV(dst, brw_imm_f(src.df)));
-      return;
-   }
-
     enum opcode op;
     switch (dst.type) {
     case BRW_REGISTER_TYPE_D:
@@ -932,8 +919,7 @@ vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src)
   */
  static int
  try_immediate_source(const nir_alu_instr *instr, src_reg *op,
-                     bool try_src0_also,
-                     ASSERTED const gen_device_info *devinfo)
+                     bool try_src0_also)
  {
     unsigned idx;
  
@@ -982,16 +968,8 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op,
        if (op[idx].abs)
           d = MAX2(-d, d);
  
-      if (op[idx].negate) {
-         /* On Gen8+ a negation source modifier on a logical operation means
-          * something different.  Nothing should generate this, so assert that
-          * it does not occur.
-          */
-         assert(devinfo->gen < 8 || (instr->op != nir_op_iand &&
-                                     instr->op != nir_op_ior &&
-                                     instr->op != nir_op_ixor));
+      if (op[idx].negate)
           d = -d;
-      }
  
        op[idx] = retype(src_reg(brw_imm_d(d)), old_type);
        break;
@@ -1146,7 +1124,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
  
     switch (instr->op) {
     case nir_op_mov:
-      try_immediate_source(instr, &op[0], true, devinfo);
+      try_immediate_source(instr, &op[0], true);
        inst = emit(MOV(dst, op[0]));
        break;
  
@@ -1197,7 +1175,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
        /* fall through */
     case nir_op_fadd:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
        inst = emit(ADD(dst, op[0], op[1]));
        break;
  
@@ -1208,42 +1186,39 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        break;
  
     case nir_op_fmul:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
        inst = emit(MUL(dst, op[0], op[1]));
        break;
  
     case nir_op_imul: {
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      if (devinfo->gen < 8) {
-         /* For integer multiplication, the MUL uses the low 16 bits of one of
-          * the operands (src0 through SNB, src1 on IVB and later). The MACH
-          * accumulates in the contribution of the upper 16 bits of that
-          * operand. If we can determine that one of the args is in the low
-          * 16 bits, though, we can just emit a single MUL.
-          */
-         if (nir_src_is_const(instr->src[0].src) &&
-             nir_alu_instr_src_read_mask(instr, 0) == 1 &&
-             const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) {
-            if (devinfo->gen < 7)
-               emit(MUL(dst, op[0], op[1]));
-            else
-               emit(MUL(dst, op[1], op[0]));
-         } else if (nir_src_is_const(instr->src[1].src) &&
-                    nir_alu_instr_src_read_mask(instr, 1) == 1 &&
-                    const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) {
-            if (devinfo->gen < 7)
-               emit(MUL(dst, op[1], op[0]));
-            else
-               emit(MUL(dst, op[0], op[1]));
-         } else {
-            struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
-
-            emit(MUL(acc, op[0], op[1]));
-            emit(MACH(dst_null_d(), op[0], op[1]));
-            emit(MOV(dst, src_reg(acc)));
-         }
+
+      /* For integer multiplication, the MUL uses the low 16 bits of one of
+       * the operands (src0 through SNB, src1 on IVB and later). The MACH
+       * accumulates in the contribution of the upper 16 bits of that
+       * operand. If we can determine that one of the args is in the low
+       * 16 bits, though, we can just emit a single MUL.
+       */
+      if (nir_src_is_const(instr->src[0].src) &&
+          nir_alu_instr_src_read_mask(instr, 0) == 1 &&
+          const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) {
+         if (devinfo->gen < 7)
+            emit(MUL(dst, op[0], op[1]));
+         else
+            emit(MUL(dst, op[1], op[0]));
+      } else if (nir_src_is_const(instr->src[1].src) &&
+                 nir_alu_instr_src_read_mask(instr, 1) == 1 &&
+                 const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) {
+         if (devinfo->gen < 7)
+            emit(MUL(dst, op[1], op[0]));
+         else
+            emit(MUL(dst, op[0], op[1]));
        } else {
-        emit(MUL(dst, op[0], op[1]));
+         struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+
+         emit(MUL(acc, op[0], op[1]));
+         emit(MACH(dst_null_d(), op[0], op[1]));
+         emit(MOV(dst, src_reg(acc)));
        }
        break;
     }
@@ -1253,11 +1228,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
        struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
  
-      if (devinfo->gen >= 8)
-         emit(MUL(acc, op[0], retype(op[1], BRW_REGISTER_TYPE_UW)));
-      else
-         emit(MUL(acc, op[0], op[1]));
-
+      emit(MUL(acc, op[0], op[1]));
        emit(MACH(dst, op[0], op[1]));
        break;
     }
@@ -1433,7 +1404,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
        /* fall through */
     case nir_op_fmin:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
        inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]);
        break;
  
@@ -1442,7 +1413,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
        /* fall through */
     case nir_op_fmax:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
        inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]);
        break;
  
@@ -1473,7 +1444,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
           /* If the order of the sources is changed due to an immediate value,
            * then the condition must also be changed.
            */
-         if (try_immediate_source(instr, op, true, devinfo) == 0)
+         if (try_immediate_source(instr, op, true) == 0)
              conditional_mod = brw_swap_cmod(conditional_mod);
  
           emit(CMP(dst, op[0], op[1], conditional_mod));
@@ -1533,39 +1504,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
  
     case nir_op_inot:
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      if (devinfo->gen >= 8) {
-         op[0] = resolve_source_modifiers(op[0]);
-      }
        emit(NOT(dst, op[0]));
        break;
  
     case nir_op_ixor:
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      if (devinfo->gen >= 8) {
-         op[0] = resolve_source_modifiers(op[0]);
-         op[1] = resolve_source_modifiers(op[1]);
-      }
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
        emit(XOR(dst, op[0], op[1]));
        break;
  
     case nir_op_ior:
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      if (devinfo->gen >= 8) {
-         op[0] = resolve_source_modifiers(op[0]);
-         op[1] = resolve_source_modifiers(op[1]);
-      }
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
        emit(OR(dst, op[0], op[1]));
        break;
  
     case nir_op_iand:
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      if (devinfo->gen >= 8) {
-         op[0] = resolve_source_modifiers(op[0]);
-         op[1] = resolve_source_modifiers(op[1]);
-      }
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
        emit(AND(dst, op[0], op[1]));
        break;
  
@@ -1843,19 +1799,19 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
  
     case nir_op_ishl:
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      try_immediate_source(instr, op, false, devinfo);
+      try_immediate_source(instr, op, false);
        emit(SHL(dst, op[0], op[1]));
        break;
  
     case nir_op_ishr:
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      try_immediate_source(instr, op, false, devinfo);
+      try_immediate_source(instr, op, false);
        emit(ASR(dst, op[0], op[1]));
        break;
  
     case nir_op_ushr:
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      try_immediate_source(instr, op, false, devinfo);
+      try_immediate_source(instr, op, false);
        emit(SHR(dst, op[0], op[1]));
        break;
  
@@ -1902,22 +1858,22 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        break;
  
     case nir_op_fdot_replicated2:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
        inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
        break;
  
     case nir_op_fdot_replicated3:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
        inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
        break;
  
     case nir_op_fdot_replicated4:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
        inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
        break;
  
     case nir_op_fdph_replicated:
-      try_immediate_source(instr, op, false, devinfo);
+      try_immediate_source(instr, op, false);
        inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
        break;
  
diff --git a/src/intel/compiler/brw_vec4_surface_builder.cpp b/src/intel/compiler/brw_vec4_surface_builder.cpp

index 7120558..5418f60 100644 (file)
--- a/src/intel/compiler/brw_vec4_surface_builder.cpp
+++ b/src/intel/compiler/brw_vec4_surface_builder.cpp
@@ -163,8 +163,7 @@ namespace brw {
                           unsigned dims, unsigned size,
                           brw_predicate pred)
        {
-         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
-                                   bld.shader->devinfo->is_haswell);
+         const bool has_simd4x2 = bld.shader->devinfo->is_haswell;
           emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
                     emit_insert(bld, addr, dims, has_simd4x2),
                     has_simd4x2 ? 1 : dims,
@@ -185,8 +184,7 @@ namespace brw {
                            unsigned dims, unsigned rsize, unsigned op,
                            brw_predicate pred)
        {
-         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
-                                   bld.shader->devinfo->is_haswell);
+         const bool has_simd4x2 = bld.shader->devinfo->is_haswell;
  
           /* Zip the components of both sources, they are represented as the X
            * and Y components of the same vector.
diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp

index f18fd9e..094ec54 100644 (file)
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_visitor.cpp
@@ -306,22 +306,9 @@ vec4_visitor::fix_3src_operand(const src_reg &src)
  }
  
  src_reg
-vec4_visitor::resolve_source_modifiers(const src_reg &src)
-{
-   if (!src.abs && !src.negate)
-      return src;
-
-   dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
-   resolved.type = src.type;
-   emit(MOV(resolved, src));
-
-   return src_reg(resolved);
-}
-
-src_reg
  vec4_visitor::fix_math_operand(const src_reg &src)
  {
-   if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
+   if (devinfo->gen < 6 || src.file == BAD_FILE)
        return src;
  
     /* The gen6 math instruction ignores the source modifiers --
@@ -753,35 +740,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
  
     vec4_instruction *pull;
  
-   if (devinfo->gen >= 9) {
-      /* Gen9+ needs a message header in order to use SIMD4x2 mode */
-      src_reg header(this, glsl_type::uvec4_type, 2);
-
-      pull = new(mem_ctx)
-         vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
-                          dst_reg(header));
-
-      if (before_inst)
-         emit_before(before_block, before_inst, pull);
-      else
-         emit(pull);
-
-      dst_reg index_reg = retype(byte_offset(dst_reg(header), REG_SIZE),
-                                 offset_reg.type);
-      pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
-
-      if (before_inst)
-         emit_before(before_block, before_inst, pull);
-      else
-         emit(pull);
-
-      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
-                                           dst,
-                                           surf_index,
-                                           header);
-      pull->mlen = 2;
-      pull->header_size = 1;
-   } else if (devinfo->gen >= 7) {
+   if (devinfo->gen >= 7) {
        dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
  
        grf_offset.type = offset_reg.type;
@@ -838,24 +797,9 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
     inst->base_mrf = 2;
     inst->src[1] = surface;
     inst->src[2] = brw_imm_ud(0); /* sampler */
+   inst->mlen = 1;
  
-   int param_base;
-
-   if (devinfo->gen >= 9) {
-      /* Gen9+ needs a message header in order to use SIMD4x2 mode */
-      vec4_instruction *header_inst = new(mem_ctx)
-         vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
-                          dst_reg(MRF, inst->base_mrf));
-
-      emit(header_inst);
-
-      inst->mlen = 2;
-      inst->header_size = 1;
-      param_base = inst->base_mrf + 1;
-   } else {
-      inst->mlen = 1;
-      param_base = inst->base_mrf;
-   }
+   const int param_base = inst->base_mrf;
  
     /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
     int coord_mask = (1 << coordinate_type->vector_elements) - 1;
@@ -874,7 +818,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
  bool
  vec4_visitor::is_high_sampler(src_reg sampler)
  {
-   if (devinfo->gen < 8 && !devinfo->is_haswell)
+   if (!devinfo->is_haswell)
        return false;
  
     return sampler.file != IMM || sampler.ud >= 16;
@@ -902,8 +846,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
     case ir_txl: opcode = SHADER_OPCODE_TXL; break;
     case ir_txd: opcode = SHADER_OPCODE_TXD; break;
     case ir_txf: opcode = SHADER_OPCODE_TXF; break;
-   case ir_txf_ms: opcode = (devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W :
-                             SHADER_OPCODE_TXF_CMS); break;
+   case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
     case ir_txs: opcode = SHADER_OPCODE_TXS; break;
     case ir_tg4: opcode = offset_value.file != BAD_FILE
                           ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
@@ -937,7 +880,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
      * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
      */
     inst->header_size =
-      (devinfo->gen < 5 || devinfo->gen >= 9 ||
+      (devinfo->gen < 5 ||
         inst->offset != 0 || op == ir_tg4 ||
         op == ir_texture_samples ||
         is_high_sampler(sampler_reg)) ? 1 : 0;
@@ -1705,11 +1648,6 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
           offset = src_reg(this, glsl_type::uint_type);
           emit_before(block, inst, ADD(dst_reg(offset), indirect,
                                        brw_imm_ud(reg_offset * 16)));
-      } else if (devinfo->gen >= 8) {
-         /* Store the offset in a GRF so we can send-from-GRF. */
-         offset = src_reg(this, glsl_type::uint_type);
-         emit_before(block, inst, MOV(dst_reg(offset),
-                                      brw_imm_ud(reg_offset * 16)));
        } else {
           offset = brw_imm_d(reg_offset * 16);
        }
author	Ian Romanick <ian.d.romanick@intel.com>
	Tue, 22 Sep 2020 20:09:56 +0000 (13:09 -0700)
committer	Ian Romanick <ian.d.romanick@intel.com>
	Mon, 28 Sep 2020 18:43:10 +0000 (11:43 -0700)
src/intel/compiler/brw_vec4.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4.h		patch \| blob \| history
src/intel/compiler/brw_vec4_copy_propagation.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_generator.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_gs_visitor.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_nir.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_surface_builder.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_visitor.cpp		patch \| blob \| history