From 2a49007411bb6617be8409f1adf557a6be4970ac Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 22 Sep 2020 13:09:56 -0700
Subject: [PATCH] intel/vec4: Remove all support for Gen8+ [v2]

v2: Restore the gen == 10 hunk in brw_compile_vs (around line 2940).
This function is also used for scalar VS compiles.  Squash in:

    intel/vec4: Reindent after removing Gen8+ support
    intel/vec4: Silence unused parameter warning in try_immediate_source

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> [v1]
Reviewed-by: Matt Turner <mattst88@gmail.com> [v1]
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> [v1]
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6826>
---
 src/intel/compiler/brw_vec4.cpp                  |  25 +---
 src/intel/compiler/brw_vec4.h                    |   1 -
 src/intel/compiler/brw_vec4_copy_propagation.cpp |  36 ++----
 src/intel/compiler/brw_vec4_generator.cpp        |  53 +++------
 src/intel/compiler/brw_vec4_gs_visitor.cpp       |  35 +-----
 src/intel/compiler/brw_vec4_nir.cpp              | 140 ++++++++---------------
 src/intel/compiler/brw_vec4_surface_builder.cpp  |   6 +-
 src/intel/compiler/brw_vec4_visitor.cpp          |  76 ++----------
 8 files changed, 87 insertions(+), 285 deletions(-)

diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index ee91be0..878ee4e 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1009,23 +1009,7 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
 
 #define IS_64BIT(reg) (reg.file != BAD_FILE && type_sz(reg.type) == 8)
 
-   /* From the Cherryview and Broadwell PRMs:
-    *
-    * "When source or destination datatype is 64b or operation is integer DWord
-    * multiply, DepCtrl must not be used."
-    *
-    * SKL PRMs don't include this restriction, however, gen7 seems to be
-    * affected, at least by the 64b restriction, since DepCtrl with double
-    * precision instructions seems to produce GPU hangs in some cases.
-    */
-   if (devinfo->gen == 8 || gen_device_info_is_9lp(devinfo)) {
-      if (inst->opcode == BRW_OPCODE_MUL &&
-         IS_DWORD(inst->src[0]) &&
-         IS_DWORD(inst->src[1]))
-         return true;
-   }
-
-   if (devinfo->gen >= 7 && devinfo->gen <= 8) {
+   if (devinfo->gen >= 7) {
       if (IS_64BIT(inst->dst) || IS_64BIT(inst->src[0]) ||
           IS_64BIT(inst->src[1]) || IS_64BIT(inst->src[2]))
       return true;
@@ -1034,11 +1018,6 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
 #undef IS_64BIT
 #undef IS_DWORD
 
-   if (devinfo->gen >= 8) {
-      if (inst->opcode == BRW_OPCODE_F32TO16)
-         return true;
-   }
-
    /*
     * mlen:
     * In the presence of send messages, totally interrupt dependency
@@ -1912,7 +1891,7 @@ vec4_visitor::lower_minmax()
 src_reg
 vec4_visitor::get_timestamp()
 {
-   assert(devinfo->gen >= 7);
+   assert(devinfo->gen == 7);
 
    src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
                                 BRW_ARF_TIMESTAMP,
diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h
index 73c18b6..a6015b8 100644
--- a/src/intel/compiler/brw_vec4.h
+++ b/src/intel/compiler/brw_vec4.h
@@ -241,7 +241,6 @@ public:
    void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
 
    src_reg fix_3src_operand(const src_reg &src);
-   src_reg resolve_source_modifiers(const src_reg &src);
 
    vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
                                const src_reg &src1 = src_reg());
diff --git a/src/intel/compiler/brw_vec4_copy_propagation.cpp b/src/intel/compiler/brw_vec4_copy_propagation.cpp
index 8fb5d1c..9e4637e 100644
--- a/src/intel/compiler/brw_vec4_copy_propagation.cpp
+++ b/src/intel/compiler/brw_vec4_copy_propagation.cpp
@@ -78,15 +78,6 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
            inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)));
 }
 
-static bool
-is_logic_op(enum opcode opcode)
-{
-   return (opcode == BRW_OPCODE_AND ||
-           opcode == BRW_OPCODE_OR  ||
-           opcode == BRW_OPCODE_XOR ||
-           opcode == BRW_OPCODE_NOT);
-}
-
 /**
  * Get the origin of a copy as a single register if all components present in
  * the given readmask originate from the same register and have compatible
@@ -132,8 +123,7 @@ get_copy_value(const copy_entry &entry, unsigned readmask)
 }
 
 static bool
-try_constant_propagate(const struct gen_device_info *devinfo,
-                       vec4_instruction *inst,
+try_constant_propagate(vec4_instruction *inst,
                        int arg, const copy_entry *entry)
 {
    /* For constant propagation, we only handle the same constant
@@ -169,17 +159,13 @@ try_constant_propagate(const struct gen_device_info *devinfo,
    }
 
    if (inst->src[arg].abs) {
-      if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
-          !brw_abs_immediate(value.type, &value.as_brw_reg())) {
+      if (!brw_abs_immediate(value.type, &value.as_brw_reg()))
          return false;
-      }
    }
 
    if (inst->src[arg].negate) {
-      if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
-          !brw_negate_immediate(value.type, &value.as_brw_reg())) {
+      if (!brw_negate_immediate(value.type, &value.as_brw_reg()))
          return false;
-      }
    }
 
    value = swizzle(value, inst->src[arg].swizzle);
@@ -200,9 +186,7 @@ try_constant_propagate(const struct gen_device_info *devinfo,
    case SHADER_OPCODE_POW:
    case SHADER_OPCODE_INT_QUOTIENT:
    case SHADER_OPCODE_INT_REMAINDER:
-      if (devinfo->gen < 8)
          break;
-      /* fallthrough */
    case BRW_OPCODE_DP2:
    case BRW_OPCODE_DP3:
    case BRW_OPCODE_DP4:
@@ -333,11 +317,10 @@ try_copy_propagate(const struct gen_device_info *devinfo,
        value.file != ATTR)
       return false;
 
-   /* In gen < 8 instructions that write 2 registers also need to read 2
-    * registers. Make sure we don't break that restriction by copy
-    * propagating from a uniform.
+   /* Instructions that write 2 registers also need to read 2 registers. Make
+    * sure we don't break that restriction by copy propagating from a uniform.
     */
-   if (devinfo->gen < 8 && inst->size_written > REG_SIZE && is_uniform(value))
+   if (inst->size_written > REG_SIZE && is_uniform(value))
       return false;
 
    /* There is a regioning restriction such that if execsize == width
@@ -358,11 +341,6 @@ try_copy_propagate(const struct gen_device_info *devinfo,
    if (type_sz(value.type) != type_sz(inst->src[arg].type))
       return false;
 
-   if (devinfo->gen >= 8 && (value.negate || value.abs) &&
-       is_logic_op(inst->opcode)) {
-      return false;
-   }
-
    if (inst->src[arg].offset % REG_SIZE || value.offset % REG_SIZE)
       return false;
 
@@ -516,7 +494,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
                                inst->src[i].offset / REG_SIZE);
          const copy_entry &entry = entries[reg];
 
-         if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
+         if (do_constant_prop && try_constant_propagate(inst, i, &entry))
             progress = true;
          else if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg))
 	    progress = true;
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index d1aa40e..3afaaed 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -130,7 +130,7 @@ generate_tex(struct brw_codegen *p,
       case SHADER_OPCODE_TXD:
          if (inst->shadow_compare) {
             /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
-            assert(devinfo->gen >= 8 || devinfo->is_haswell);
+            assert(devinfo->is_haswell);
             msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
          } else {
             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
@@ -139,10 +139,6 @@ generate_tex(struct brw_codegen *p,
       case SHADER_OPCODE_TXF:
 	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
 	 break;
-      case SHADER_OPCODE_TXF_CMS_W:
-         assert(devinfo->gen >= 9);
-         msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
-         break;
       case SHADER_OPCODE_TXF_CMS:
          if (devinfo->gen >= 7)
             msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
@@ -234,12 +230,6 @@ generate_tex(struct brw_codegen *p,
             /* Set the texel offset bits in DWord 2. */
             dw2 = inst->offset;
 
-         if (devinfo->gen >= 9)
-            /* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D,
-             * based on bit 22 in the header.
-             */
-            dw2 |= GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2;
-
          /* The VS, DS, and FS stages have the g0.2 payload delivered as 0,
           * so header0.2 is 0 when g0 is copied.  The HS and GS stages do
           * not, so we must set to to 0 to avoid setting undesirable bits
@@ -472,29 +462,24 @@ generate_gs_set_vertex_count(struct brw_codegen *p,
    brw_push_insn_state(p);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
-   if (p->devinfo->gen >= 8) {
-      /* Move the vertex count into the second MRF for the EOT write. */
-      brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD),
-              src);
-   } else {
-      /* If we think of the src and dst registers as composed of 8 DWORDs each,
-       * we want to pick up the contents of DWORDs 0 and 4 from src, truncate
-       * them to WORDs, and then pack them into DWORD 2 of dst.
-       *
-       * It's easier to get the EU to do this if we think of the src and dst
-       * registers as composed of 16 WORDS each; then, we want to pick up the
-       * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5
-       * of dst.
-       *
-       * We can do that by the following EU instruction:
-       *
-       *     mov (2) dst.4<1>:uw src<8;1,0>:uw   { Align1, Q1, NoMask }
-       */
-      brw_set_default_access_mode(p, BRW_ALIGN_1);
-      brw_MOV(p,
-              suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
-              stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
-   }
+   /* If we think of the src and dst registers as composed of 8 DWORDs each,
+    * we want to pick up the contents of DWORDs 0 and 4 from src, truncate
+    * them to WORDs, and then pack them into DWORD 2 of dst.
+    *
+    * It's easier to get the EU to do this if we think of the src and dst
+    * registers as composed of 16 WORDS each; then, we want to pick up the
+    * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5
+    * of dst.
+    *
+    * We can do that by the following EU instruction:
+    *
+    *     mov (2) dst.4<1>:uw src<8;1,0>:uw   { Align1, Q1, NoMask }
+    */
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_MOV(p,
+           suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
+           stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
+
    brw_pop_insn_state(p);
 }
 
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index 615b296..9ee2765 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -214,35 +214,17 @@ vec4_gs_visitor::emit_thread_end()
     */
    int base_mrf = 1;
 
-   bool static_vertex_count = gs_prog_data->static_vertex_count != -1;
-
-   /* If the previous instruction was a URB write, we don't need to issue
-    * a second one - we can just set the EOT bit on the previous write.
-    *
-    * Skip this on Gen8+ unless there's a static vertex count, as we also
-    * need to write the vertex count out, and combining the two may not be
-    * possible (or at least not straightforward).
-    */
-   vec4_instruction *last = (vec4_instruction *) instructions.get_tail();
-   if (last && last->opcode == GS_OPCODE_URB_WRITE &&
-       !(INTEL_DEBUG & DEBUG_SHADER_TIME) &&
-       devinfo->gen >= 8 && static_vertex_count) {
-      last->urb_write_flags = BRW_URB_WRITE_EOT | last->urb_write_flags;
-      return;
-   }
-
    current_annotation = "thread end";
    dst_reg mrf_reg(MRF, base_mrf);
    src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
    vec4_instruction *inst = emit(MOV(mrf_reg, r0));
    inst->force_writemask_all = true;
-   if (devinfo->gen < 8 || !static_vertex_count)
-      emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
+   emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
       emit_shader_time_end();
    inst = emit(GS_OPCODE_THREAD_END);
    inst->base_mrf = base_mrf;
-   inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1;
+   inst->mlen = 1;
 }
 
 
@@ -279,12 +261,6 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
    vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
    inst->offset = gs_prog_data->control_data_header_size_hwords;
 
-   /* We need to increment Global Offset by 1 to make room for Broadwell's
-    * extra "Vertex Count" payload at the beginning of the URB entry.
-    */
-   if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
-      inst->offset++;
-
    inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
    return inst;
 }
@@ -398,13 +374,6 @@ vec4_gs_visitor::emit_control_data_bits()
    inst->force_writemask_all = true;
    inst = emit(GS_OPCODE_URB_WRITE);
    inst->urb_write_flags = urb_write_flags;
-   /* We need to increment Global Offset by 256-bits to make room for
-    * Broadwell's extra "Vertex Count" payload at the beginning of the
-    * URB entry.  Since this is an OWord message, Global Offset is counted
-    * in 128-bit units, so we must set it to 2.
-    */
-   if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
-      inst->offset = 2;
    inst->base_mrf = base_mrf;
    inst->mlen = 2;
 }
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index 3825a84..a2b691b 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -283,10 +283,7 @@ static src_reg
 setup_imm_df(const vec4_builder &bld, double v)
 {
    const gen_device_info *devinfo = bld.shader->devinfo;
-   assert(devinfo->gen >= 7);
-
-   if (devinfo->gen >= 8)
-      return brw_imm_df(v);
+   assert(devinfo->gen == 7);
 
    /* gen7.5 does not support DF immediates straighforward but the DIM
     * instruction allows to set the 64-bit immediate value.
@@ -463,7 +460,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    }
 
    case nir_intrinsic_store_ssbo: {
-      assert(devinfo->gen >= 7);
+      assert(devinfo->gen == 7);
 
       /* brw_nir_lower_mem_access_bit_sizes takes care of this */
       assert(nir_src_bit_size(instr->src[0]) == 32);
@@ -525,7 +522,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    }
 
    case nir_intrinsic_load_ssbo: {
-      assert(devinfo->gen >= 7);
+      assert(devinfo->gen == 7);
 
       /* brw_nir_lower_mem_access_bit_sizes takes care of this */
       assert(nir_dest_bit_size(instr->dest) == 32);
@@ -867,16 +864,6 @@ emit_find_msb_using_lzd(const vec4_builder &bld,
 void
 vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src)
 {
-   /* BDW PRM vol 15 - workarounds:
-    * DF->f format conversion for Align16 has wrong emask calculation when
-    * source is immediate.
-    */
-   if (devinfo->gen == 8 && dst.type == BRW_REGISTER_TYPE_F &&
-       src.file == BRW_IMMEDIATE_VALUE) {
-      emit(MOV(dst, brw_imm_f(src.df)));
-      return;
-   }
-
    enum opcode op;
    switch (dst.type) {
    case BRW_REGISTER_TYPE_D:
@@ -932,8 +919,7 @@ vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src)
  */
 static int
 try_immediate_source(const nir_alu_instr *instr, src_reg *op,
-                     bool try_src0_also,
-                     ASSERTED const gen_device_info *devinfo)
+                     bool try_src0_also)
 {
    unsigned idx;
 
@@ -982,16 +968,8 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op,
       if (op[idx].abs)
          d = MAX2(-d, d);
 
-      if (op[idx].negate) {
-         /* On Gen8+ a negation source modifier on a logical operation means
-          * something different.  Nothing should generate this, so assert that
-          * it does not occur.
-          */
-         assert(devinfo->gen < 8 || (instr->op != nir_op_iand &&
-                                     instr->op != nir_op_ior &&
-                                     instr->op != nir_op_ixor));
+      if (op[idx].negate)
          d = -d;
-      }
 
       op[idx] = retype(src_reg(brw_imm_d(d)), old_type);
       break;
@@ -1146,7 +1124,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
 
    switch (instr->op) {
    case nir_op_mov:
-      try_immediate_source(instr, &op[0], true, devinfo);
+      try_immediate_source(instr, &op[0], true);
       inst = emit(MOV(dst, op[0]));
       break;
 
@@ -1197,7 +1175,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
       /* fall through */
    case nir_op_fadd:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
       inst = emit(ADD(dst, op[0], op[1]));
       break;
 
@@ -1208,42 +1186,39 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       break;
 
    case nir_op_fmul:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
       inst = emit(MUL(dst, op[0], op[1]));
       break;
 
    case nir_op_imul: {
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      if (devinfo->gen < 8) {
-         /* For integer multiplication, the MUL uses the low 16 bits of one of
-          * the operands (src0 through SNB, src1 on IVB and later). The MACH
-          * accumulates in the contribution of the upper 16 bits of that
-          * operand. If we can determine that one of the args is in the low
-          * 16 bits, though, we can just emit a single MUL.
-          */
-         if (nir_src_is_const(instr->src[0].src) &&
-             nir_alu_instr_src_read_mask(instr, 0) == 1 &&
-             const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) {
-            if (devinfo->gen < 7)
-               emit(MUL(dst, op[0], op[1]));
-            else
-               emit(MUL(dst, op[1], op[0]));
-         } else if (nir_src_is_const(instr->src[1].src) &&
-                    nir_alu_instr_src_read_mask(instr, 1) == 1 &&
-                    const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) {
-            if (devinfo->gen < 7)
-               emit(MUL(dst, op[1], op[0]));
-            else
-               emit(MUL(dst, op[0], op[1]));
-         } else {
-            struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
-
-            emit(MUL(acc, op[0], op[1]));
-            emit(MACH(dst_null_d(), op[0], op[1]));
-            emit(MOV(dst, src_reg(acc)));
-         }
+
+      /* For integer multiplication, the MUL uses the low 16 bits of one of
+       * the operands (src0 through SNB, src1 on IVB and later). The MACH
+       * accumulates in the contribution of the upper 16 bits of that
+       * operand. If we can determine that one of the args is in the low
+       * 16 bits, though, we can just emit a single MUL.
+       */
+      if (nir_src_is_const(instr->src[0].src) &&
+          nir_alu_instr_src_read_mask(instr, 0) == 1 &&
+          const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) {
+         if (devinfo->gen < 7)
+            emit(MUL(dst, op[0], op[1]));
+         else
+            emit(MUL(dst, op[1], op[0]));
+      } else if (nir_src_is_const(instr->src[1].src) &&
+                 nir_alu_instr_src_read_mask(instr, 1) == 1 &&
+                 const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) {
+         if (devinfo->gen < 7)
+            emit(MUL(dst, op[1], op[0]));
+         else
+            emit(MUL(dst, op[0], op[1]));
       } else {
-	 emit(MUL(dst, op[0], op[1]));
+         struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+
+         emit(MUL(acc, op[0], op[1]));
+         emit(MACH(dst_null_d(), op[0], op[1]));
+         emit(MOV(dst, src_reg(acc)));
       }
       break;
    }
@@ -1253,11 +1228,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
       struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
 
-      if (devinfo->gen >= 8)
-         emit(MUL(acc, op[0], retype(op[1], BRW_REGISTER_TYPE_UW)));
-      else
-         emit(MUL(acc, op[0], op[1]));
-
+      emit(MUL(acc, op[0], op[1]));
       emit(MACH(dst, op[0], op[1]));
       break;
    }
@@ -1433,7 +1404,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
       /* fall through */
    case nir_op_fmin:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
       inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]);
       break;
 
@@ -1442,7 +1413,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
       /* fall through */
    case nir_op_fmax:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
       inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]);
       break;
 
@@ -1473,7 +1444,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
          /* If the order of the sources is changed due to an immediate value,
           * then the condition must also be changed.
           */
-         if (try_immediate_source(instr, op, true, devinfo) == 0)
+         if (try_immediate_source(instr, op, true) == 0)
             conditional_mod = brw_swap_cmod(conditional_mod);
 
          emit(CMP(dst, op[0], op[1], conditional_mod));
@@ -1533,39 +1504,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
 
    case nir_op_inot:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      if (devinfo->gen >= 8) {
-         op[0] = resolve_source_modifiers(op[0]);
-      }
       emit(NOT(dst, op[0]));
       break;
 
    case nir_op_ixor:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      if (devinfo->gen >= 8) {
-         op[0] = resolve_source_modifiers(op[0]);
-         op[1] = resolve_source_modifiers(op[1]);
-      }
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
       emit(XOR(dst, op[0], op[1]));
       break;
 
    case nir_op_ior:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      if (devinfo->gen >= 8) {
-         op[0] = resolve_source_modifiers(op[0]);
-         op[1] = resolve_source_modifiers(op[1]);
-      }
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
       emit(OR(dst, op[0], op[1]));
       break;
 
    case nir_op_iand:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      if (devinfo->gen >= 8) {
-         op[0] = resolve_source_modifiers(op[0]);
-         op[1] = resolve_source_modifiers(op[1]);
-      }
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
       emit(AND(dst, op[0], op[1]));
       break;
 
@@ -1843,19 +1799,19 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
 
    case nir_op_ishl:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      try_immediate_source(instr, op, false, devinfo);
+      try_immediate_source(instr, op, false);
       emit(SHL(dst, op[0], op[1]));
       break;
 
    case nir_op_ishr:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      try_immediate_source(instr, op, false, devinfo);
+      try_immediate_source(instr, op, false);
       emit(ASR(dst, op[0], op[1]));
       break;
 
    case nir_op_ushr:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      try_immediate_source(instr, op, false, devinfo);
+      try_immediate_source(instr, op, false);
       emit(SHR(dst, op[0], op[1]));
       break;
 
@@ -1902,22 +1858,22 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       break;
 
    case nir_op_fdot_replicated2:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
       inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
       break;
 
    case nir_op_fdot_replicated3:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
       inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
       break;
 
    case nir_op_fdot_replicated4:
-      try_immediate_source(instr, op, true, devinfo);
+      try_immediate_source(instr, op, true);
       inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
       break;
 
    case nir_op_fdph_replicated:
-      try_immediate_source(instr, op, false, devinfo);
+      try_immediate_source(instr, op, false);
       inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
       break;
 
diff --git a/src/intel/compiler/brw_vec4_surface_builder.cpp b/src/intel/compiler/brw_vec4_surface_builder.cpp
index 7120558..5418f60 100644
--- a/src/intel/compiler/brw_vec4_surface_builder.cpp
+++ b/src/intel/compiler/brw_vec4_surface_builder.cpp
@@ -163,8 +163,7 @@ namespace brw {
                          unsigned dims, unsigned size,
                          brw_predicate pred)
       {
-         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
-                                   bld.shader->devinfo->is_haswell);
+         const bool has_simd4x2 = bld.shader->devinfo->is_haswell;
          emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
                    emit_insert(bld, addr, dims, has_simd4x2),
                    has_simd4x2 ? 1 : dims,
@@ -185,8 +184,7 @@ namespace brw {
                           unsigned dims, unsigned rsize, unsigned op,
                           brw_predicate pred)
       {
-         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
-                                   bld.shader->devinfo->is_haswell);
+         const bool has_simd4x2 = bld.shader->devinfo->is_haswell;
 
          /* Zip the components of both sources, they are represented as the X
           * and Y components of the same vector.
diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp
index f18fd9e..094ec54 100644
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_visitor.cpp
@@ -306,22 +306,9 @@ vec4_visitor::fix_3src_operand(const src_reg &src)
 }
 
 src_reg
-vec4_visitor::resolve_source_modifiers(const src_reg &src)
-{
-   if (!src.abs && !src.negate)
-      return src;
-
-   dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
-   resolved.type = src.type;
-   emit(MOV(resolved, src));
-
-   return src_reg(resolved);
-}
-
-src_reg
 vec4_visitor::fix_math_operand(const src_reg &src)
 {
-   if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
+   if (devinfo->gen < 6 || src.file == BAD_FILE)
       return src;
 
    /* The gen6 math instruction ignores the source modifiers --
@@ -753,35 +740,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
 
    vec4_instruction *pull;
 
-   if (devinfo->gen >= 9) {
-      /* Gen9+ needs a message header in order to use SIMD4x2 mode */
-      src_reg header(this, glsl_type::uvec4_type, 2);
-
-      pull = new(mem_ctx)
-         vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
-                          dst_reg(header));
-
-      if (before_inst)
-         emit_before(before_block, before_inst, pull);
-      else
-         emit(pull);
-
-      dst_reg index_reg = retype(byte_offset(dst_reg(header), REG_SIZE),
-                                 offset_reg.type);
-      pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
-
-      if (before_inst)
-         emit_before(before_block, before_inst, pull);
-      else
-         emit(pull);
-
-      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
-                                           dst,
-                                           surf_index,
-                                           header);
-      pull->mlen = 2;
-      pull->header_size = 1;
-   } else if (devinfo->gen >= 7) {
+   if (devinfo->gen >= 7) {
       dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
 
       grf_offset.type = offset_reg.type;
@@ -838,24 +797,9 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
    inst->base_mrf = 2;
    inst->src[1] = surface;
    inst->src[2] = brw_imm_ud(0); /* sampler */
+   inst->mlen = 1;
 
-   int param_base;
-
-   if (devinfo->gen >= 9) {
-      /* Gen9+ needs a message header in order to use SIMD4x2 mode */
-      vec4_instruction *header_inst = new(mem_ctx)
-         vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
-                          dst_reg(MRF, inst->base_mrf));
-
-      emit(header_inst);
-
-      inst->mlen = 2;
-      inst->header_size = 1;
-      param_base = inst->base_mrf + 1;
-   } else {
-      inst->mlen = 1;
-      param_base = inst->base_mrf;
-   }
+   const int param_base = inst->base_mrf;
 
    /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
    int coord_mask = (1 << coordinate_type->vector_elements) - 1;
@@ -874,7 +818,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
 bool
 vec4_visitor::is_high_sampler(src_reg sampler)
 {
-   if (devinfo->gen < 8 && !devinfo->is_haswell)
+   if (!devinfo->is_haswell)
       return false;
 
    return sampler.file != IMM || sampler.ud >= 16;
@@ -902,8 +846,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
    case ir_txl: opcode = SHADER_OPCODE_TXL; break;
    case ir_txd: opcode = SHADER_OPCODE_TXD; break;
    case ir_txf: opcode = SHADER_OPCODE_TXF; break;
-   case ir_txf_ms: opcode = (devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W :
-                             SHADER_OPCODE_TXF_CMS); break;
+   case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
    case ir_txs: opcode = SHADER_OPCODE_TXS; break;
    case ir_tg4: opcode = offset_value.file != BAD_FILE
                          ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
@@ -937,7 +880,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
     * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
     */
    inst->header_size =
-      (devinfo->gen < 5 || devinfo->gen >= 9 ||
+      (devinfo->gen < 5 ||
        inst->offset != 0 || op == ir_tg4 ||
        op == ir_texture_samples ||
        is_high_sampler(sampler_reg)) ? 1 : 0;
@@ -1705,11 +1648,6 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
          offset = src_reg(this, glsl_type::uint_type);
          emit_before(block, inst, ADD(dst_reg(offset), indirect,
                                       brw_imm_ud(reg_offset * 16)));
-      } else if (devinfo->gen >= 8) {
-         /* Store the offset in a GRF so we can send-from-GRF. */
-         offset = src_reg(this, glsl_type::uint_type);
-         emit_before(block, inst, MOV(dst_reg(offset),
-                                      brw_imm_ud(reg_offset * 16)));
       } else {
          offset = brw_imm_d(reg_offset * 16);
       }
-- 
2.7.4