From: Ian Romanick Date: Tue, 22 Sep 2020 20:09:56 +0000 (-0700) Subject: intel/vec4: Remove all support for Gen8+ [v2] X-Git-Tag: upstream/21.0.0~4945 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2a49007411bb6617be8409f1adf557a6be4970ac;p=platform%2Fupstream%2Fmesa.git intel/vec4: Remove all support for Gen8+ [v2] v2: Restore the gen == 10 hunk in brw_compile_vs (around line 2940). This function is also used for scalar VS compiles. Squash in: intel/vec4: Reindent after removing Gen8+ support intel/vec4: Silence unused parameter warning in try_immediate_source Reviewed-by: Jason Ekstrand [v1] Reviewed-by: Matt Turner [v1] Reviewed-by: Kenneth Graunke [v1] Part-of: --- diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index ee91be0..878ee4e 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -1009,23 +1009,7 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst) #define IS_64BIT(reg) (reg.file != BAD_FILE && type_sz(reg.type) == 8) - /* From the Cherryview and Broadwell PRMs: - * - * "When source or destination datatype is 64b or operation is integer DWord - * multiply, DepCtrl must not be used." - * - * SKL PRMs don't include this restriction, however, gen7 seems to be - * affected, at least by the 64b restriction, since DepCtrl with double - * precision instructions seems to produce GPU hangs in some cases. - */ - if (devinfo->gen == 8 || gen_device_info_is_9lp(devinfo)) { - if (inst->opcode == BRW_OPCODE_MUL && - IS_DWORD(inst->src[0]) && - IS_DWORD(inst->src[1])) - return true; - } - - if (devinfo->gen >= 7 && devinfo->gen <= 8) { + if (devinfo->gen >= 7) { if (IS_64BIT(inst->dst) || IS_64BIT(inst->src[0]) || IS_64BIT(inst->src[1]) || IS_64BIT(inst->src[2])) return true; @@ -1034,11 +1018,6 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst) #undef IS_64BIT #undef IS_DWORD - if (devinfo->gen >= 8) { - if (inst->opcode == BRW_OPCODE_F32TO16) - return true; - } - /* * mlen: * In the presence of send messages, totally interrupt dependency @@ -1912,7 +1891,7 @@ vec4_visitor::lower_minmax() src_reg vec4_visitor::get_timestamp() { - assert(devinfo->gen >= 7); + assert(devinfo->gen == 7); src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_TIMESTAMP, diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h index 73c18b6..a6015b8 100644 --- a/src/intel/compiler/brw_vec4.h +++ b/src/intel/compiler/brw_vec4.h @@ -241,7 +241,6 @@ public: void fix_float_operands(src_reg op[3], nir_alu_instr *instr); src_reg fix_3src_operand(const src_reg &src); - src_reg resolve_source_modifiers(const src_reg &src); vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0, const src_reg &src1 = src_reg()); diff --git a/src/intel/compiler/brw_vec4_copy_propagation.cpp b/src/intel/compiler/brw_vec4_copy_propagation.cpp index 8fb5d1c..9e4637e 100644 --- a/src/intel/compiler/brw_vec4_copy_propagation.cpp +++ b/src/intel/compiler/brw_vec4_copy_propagation.cpp @@ -78,15 +78,6 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch) inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch))); } -static bool -is_logic_op(enum opcode opcode) -{ - return (opcode == BRW_OPCODE_AND || - opcode == BRW_OPCODE_OR || - opcode == BRW_OPCODE_XOR || - opcode == BRW_OPCODE_NOT); -} - /** * Get the origin of a copy as a single register if all components present in * the given readmask originate from the same register and have compatible @@ -132,8 +123,7 @@ get_copy_value(const copy_entry &entry, unsigned readmask) } static bool -try_constant_propagate(const struct gen_device_info *devinfo, - vec4_instruction *inst, +try_constant_propagate(vec4_instruction *inst, int arg, const copy_entry *entry) { /* For constant propagation, we only handle the same constant @@ -169,17 +159,13 @@ try_constant_propagate(const struct gen_device_info *devinfo, } if (inst->src[arg].abs) { - if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) || - !brw_abs_immediate(value.type, &value.as_brw_reg())) { + if (!brw_abs_immediate(value.type, &value.as_brw_reg())) return false; - } } if (inst->src[arg].negate) { - if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) || - !brw_negate_immediate(value.type, &value.as_brw_reg())) { + if (!brw_negate_immediate(value.type, &value.as_brw_reg())) return false; - } } value = swizzle(value, inst->src[arg].swizzle); @@ -200,9 +186,7 @@ try_constant_propagate(const struct gen_device_info *devinfo, case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: - if (devinfo->gen < 8) break; - /* fallthrough */ case BRW_OPCODE_DP2: case BRW_OPCODE_DP3: case BRW_OPCODE_DP4: @@ -333,11 +317,10 @@ try_copy_propagate(const struct gen_device_info *devinfo, value.file != ATTR) return false; - /* In gen < 8 instructions that write 2 registers also need to read 2 - * registers. Make sure we don't break that restriction by copy - * propagating from a uniform. + /* Instructions that write 2 registers also need to read 2 registers. Make + * sure we don't break that restriction by copy propagating from a uniform. */ - if (devinfo->gen < 8 && inst->size_written > REG_SIZE && is_uniform(value)) + if (inst->size_written > REG_SIZE && is_uniform(value)) return false; /* There is a regioning restriction such that if execsize == width @@ -358,11 +341,6 @@ try_copy_propagate(const struct gen_device_info *devinfo, if (type_sz(value.type) != type_sz(inst->src[arg].type)) return false; - if (devinfo->gen >= 8 && (value.negate || value.abs) && - is_logic_op(inst->opcode)) { - return false; - } - if (inst->src[arg].offset % REG_SIZE || value.offset % REG_SIZE) return false; @@ -516,7 +494,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) inst->src[i].offset / REG_SIZE); const copy_entry &entry = entries[reg]; - if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry)) + if (do_constant_prop && try_constant_propagate(inst, i, &entry)) progress = true; else if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg)) progress = true; diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index d1aa40e..3afaaed 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -130,7 +130,7 @@ generate_tex(struct brw_codegen *p, case SHADER_OPCODE_TXD: if (inst->shadow_compare) { /* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */ - assert(devinfo->gen >= 8 || devinfo->is_haswell); + assert(devinfo->is_haswell); msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; @@ -139,10 +139,6 @@ generate_tex(struct brw_codegen *p, case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; - case SHADER_OPCODE_TXF_CMS_W: - assert(devinfo->gen >= 9); - msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W; - break; case SHADER_OPCODE_TXF_CMS: if (devinfo->gen >= 7) msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; @@ -234,12 +230,6 @@ generate_tex(struct brw_codegen *p, /* Set the texel offset bits in DWord 2. */ dw2 = inst->offset; - if (devinfo->gen >= 9) - /* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D, - * based on bit 22 in the header. - */ - dw2 |= GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2; - /* The VS, DS, and FS stages have the g0.2 payload delivered as 0, * so header0.2 is 0 when g0 is copied. The HS and GS stages do * not, so we must set to to 0 to avoid setting undesirable bits @@ -472,29 +462,24 @@ generate_gs_set_vertex_count(struct brw_codegen *p, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - if (p->devinfo->gen >= 8) { - /* Move the vertex count into the second MRF for the EOT write. */ - brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD), - src); - } else { - /* If we think of the src and dst registers as composed of 8 DWORDs each, - * we want to pick up the contents of DWORDs 0 and 4 from src, truncate - * them to WORDs, and then pack them into DWORD 2 of dst. - * - * It's easier to get the EU to do this if we think of the src and dst - * registers as composed of 16 WORDS each; then, we want to pick up the - * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5 - * of dst. - * - * We can do that by the following EU instruction: - * - * mov (2) dst.4<1>:uw src<8;1,0>:uw { Align1, Q1, NoMask } - */ - brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, - suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4), - stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0)); - } + /* If we think of the src and dst registers as composed of 8 DWORDs each, + * we want to pick up the contents of DWORDs 0 and 4 from src, truncate + * them to WORDs, and then pack them into DWORD 2 of dst. + * + * It's easier to get the EU to do this if we think of the src and dst + * registers as composed of 16 WORDS each; then, we want to pick up the + * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5 + * of dst. + * + * We can do that by the following EU instruction: + * + * mov (2) dst.4<1>:uw src<8;1,0>:uw { Align1, Q1, NoMask } + */ + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, + suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4), + stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0)); + brw_pop_insn_state(p); } diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index 615b296..9ee2765 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -214,35 +214,17 @@ vec4_gs_visitor::emit_thread_end() */ int base_mrf = 1; - bool static_vertex_count = gs_prog_data->static_vertex_count != -1; - - /* If the previous instruction was a URB write, we don't need to issue - * a second one - we can just set the EOT bit on the previous write. - * - * Skip this on Gen8+ unless there's a static vertex count, as we also - * need to write the vertex count out, and combining the two may not be - * possible (or at least not straightforward). - */ - vec4_instruction *last = (vec4_instruction *) instructions.get_tail(); - if (last && last->opcode == GS_OPCODE_URB_WRITE && - !(INTEL_DEBUG & DEBUG_SHADER_TIME) && - devinfo->gen >= 8 && static_vertex_count) { - last->urb_write_flags = BRW_URB_WRITE_EOT | last->urb_write_flags; - return; - } - current_annotation = "thread end"; dst_reg mrf_reg(MRF, base_mrf); src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); vec4_instruction *inst = emit(MOV(mrf_reg, r0)); inst->force_writemask_all = true; - if (devinfo->gen < 8 || !static_vertex_count) - emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count); + emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count); if (INTEL_DEBUG & DEBUG_SHADER_TIME) emit_shader_time_end(); inst = emit(GS_OPCODE_THREAD_END); inst->base_mrf = base_mrf; - inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1; + inst->mlen = 1; } @@ -279,12 +261,6 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete) vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE); inst->offset = gs_prog_data->control_data_header_size_hwords; - /* We need to increment Global Offset by 1 to make room for Broadwell's - * extra "Vertex Count" payload at the beginning of the URB entry. - */ - if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1) - inst->offset++; - inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; return inst; } @@ -398,13 +374,6 @@ vec4_gs_visitor::emit_control_data_bits() inst->force_writemask_all = true; inst = emit(GS_OPCODE_URB_WRITE); inst->urb_write_flags = urb_write_flags; - /* We need to increment Global Offset by 256-bits to make room for - * Broadwell's extra "Vertex Count" payload at the beginning of the - * URB entry. Since this is an OWord message, Global Offset is counted - * in 128-bit units, so we must set it to 2. - */ - if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1) - inst->offset = 2; inst->base_mrf = base_mrf; inst->mlen = 2; } diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 3825a84..a2b691b 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -283,10 +283,7 @@ static src_reg setup_imm_df(const vec4_builder &bld, double v) { const gen_device_info *devinfo = bld.shader->devinfo; - assert(devinfo->gen >= 7); - - if (devinfo->gen >= 8) - return brw_imm_df(v); + assert(devinfo->gen == 7); /* gen7.5 does not support DF immediates straighforward but the DIM * instruction allows to set the 64-bit immediate value. @@ -463,7 +460,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } case nir_intrinsic_store_ssbo: { - assert(devinfo->gen >= 7); + assert(devinfo->gen == 7); /* brw_nir_lower_mem_access_bit_sizes takes care of this */ assert(nir_src_bit_size(instr->src[0]) == 32); @@ -525,7 +522,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } case nir_intrinsic_load_ssbo: { - assert(devinfo->gen >= 7); + assert(devinfo->gen == 7); /* brw_nir_lower_mem_access_bit_sizes takes care of this */ assert(nir_dest_bit_size(instr->dest) == 32); @@ -867,16 +864,6 @@ emit_find_msb_using_lzd(const vec4_builder &bld, void vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src) { - /* BDW PRM vol 15 - workarounds: - * DF->f format conversion for Align16 has wrong emask calculation when - * source is immediate. - */ - if (devinfo->gen == 8 && dst.type == BRW_REGISTER_TYPE_F && - src.file == BRW_IMMEDIATE_VALUE) { - emit(MOV(dst, brw_imm_f(src.df))); - return; - } - enum opcode op; switch (dst.type) { case BRW_REGISTER_TYPE_D: @@ -932,8 +919,7 @@ vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src) */ static int try_immediate_source(const nir_alu_instr *instr, src_reg *op, - bool try_src0_also, - ASSERTED const gen_device_info *devinfo) + bool try_src0_also) { unsigned idx; @@ -982,16 +968,8 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op, if (op[idx].abs) d = MAX2(-d, d); - if (op[idx].negate) { - /* On Gen8+ a negation source modifier on a logical operation means - * something different. Nothing should generate this, so assert that - * it does not occur. - */ - assert(devinfo->gen < 8 || (instr->op != nir_op_iand && - instr->op != nir_op_ior && - instr->op != nir_op_ixor)); + if (op[idx].negate) d = -d; - } op[idx] = retype(src_reg(brw_imm_d(d)), old_type); break; @@ -1146,7 +1124,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) switch (instr->op) { case nir_op_mov: - try_immediate_source(instr, &op[0], true, devinfo); + try_immediate_source(instr, &op[0], true); inst = emit(MOV(dst, op[0])); break; @@ -1197,7 +1175,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fadd: - try_immediate_source(instr, op, true, devinfo); + try_immediate_source(instr, op, true); inst = emit(ADD(dst, op[0], op[1])); break; @@ -1208,42 +1186,39 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_fmul: - try_immediate_source(instr, op, true, devinfo); + try_immediate_source(instr, op, true); inst = emit(MUL(dst, op[0], op[1])); break; case nir_op_imul: { assert(nir_dest_bit_size(instr->dest.dest) < 64); - if (devinfo->gen < 8) { - /* For integer multiplication, the MUL uses the low 16 bits of one of - * the operands (src0 through SNB, src1 on IVB and later). The MACH - * accumulates in the contribution of the upper 16 bits of that - * operand. If we can determine that one of the args is in the low - * 16 bits, though, we can just emit a single MUL. - */ - if (nir_src_is_const(instr->src[0].src) && - nir_alu_instr_src_read_mask(instr, 0) == 1 && - const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) { - if (devinfo->gen < 7) - emit(MUL(dst, op[0], op[1])); - else - emit(MUL(dst, op[1], op[0])); - } else if (nir_src_is_const(instr->src[1].src) && - nir_alu_instr_src_read_mask(instr, 1) == 1 && - const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) { - if (devinfo->gen < 7) - emit(MUL(dst, op[1], op[0])); - else - emit(MUL(dst, op[0], op[1])); - } else { - struct brw_reg acc = retype(brw_acc_reg(8), dst.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(dst_null_d(), op[0], op[1])); - emit(MOV(dst, src_reg(acc))); - } + + /* For integer multiplication, the MUL uses the low 16 bits of one of + * the operands (src0 through SNB, src1 on IVB and later). The MACH + * accumulates in the contribution of the upper 16 bits of that + * operand. If we can determine that one of the args is in the low + * 16 bits, though, we can just emit a single MUL. + */ + if (nir_src_is_const(instr->src[0].src) && + nir_alu_instr_src_read_mask(instr, 0) == 1 && + const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) { + if (devinfo->gen < 7) + emit(MUL(dst, op[0], op[1])); + else + emit(MUL(dst, op[1], op[0])); + } else if (nir_src_is_const(instr->src[1].src) && + nir_alu_instr_src_read_mask(instr, 1) == 1 && + const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) { + if (devinfo->gen < 7) + emit(MUL(dst, op[1], op[0])); + else + emit(MUL(dst, op[0], op[1])); } else { - emit(MUL(dst, op[0], op[1])); + struct brw_reg acc = retype(brw_acc_reg(8), dst.type); + + emit(MUL(acc, op[0], op[1])); + emit(MACH(dst_null_d(), op[0], op[1])); + emit(MOV(dst, src_reg(acc))); } break; } @@ -1253,11 +1228,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); struct brw_reg acc = retype(brw_acc_reg(8), dst.type); - if (devinfo->gen >= 8) - emit(MUL(acc, op[0], retype(op[1], BRW_REGISTER_TYPE_UW))); - else - emit(MUL(acc, op[0], op[1])); - + emit(MUL(acc, op[0], op[1])); emit(MACH(dst, op[0], op[1])); break; } @@ -1433,7 +1404,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fmin: - try_immediate_source(instr, op, true, devinfo); + try_immediate_source(instr, op, true); inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]); break; @@ -1442,7 +1413,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fmax: - try_immediate_source(instr, op, true, devinfo); + try_immediate_source(instr, op, true); inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]); break; @@ -1473,7 +1444,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) /* If the order of the sources is changed due to an immediate value, * then the condition must also be changed. */ - if (try_immediate_source(instr, op, true, devinfo) == 0) + if (try_immediate_source(instr, op, true) == 0) conditional_mod = brw_swap_cmod(conditional_mod); emit(CMP(dst, op[0], op[1], conditional_mod)); @@ -1533,39 +1504,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_inot: assert(nir_dest_bit_size(instr->dest.dest) < 64); - if (devinfo->gen >= 8) { - op[0] = resolve_source_modifiers(op[0]); - } emit(NOT(dst, op[0])); break; case nir_op_ixor: assert(nir_dest_bit_size(instr->dest.dest) < 64); - if (devinfo->gen >= 8) { - op[0] = resolve_source_modifiers(op[0]); - op[1] = resolve_source_modifiers(op[1]); - } - try_immediate_source(instr, op, true, devinfo); + try_immediate_source(instr, op, true); emit(XOR(dst, op[0], op[1])); break; case nir_op_ior: assert(nir_dest_bit_size(instr->dest.dest) < 64); - if (devinfo->gen >= 8) { - op[0] = resolve_source_modifiers(op[0]); - op[1] = resolve_source_modifiers(op[1]); - } - try_immediate_source(instr, op, true, devinfo); + try_immediate_source(instr, op, true); emit(OR(dst, op[0], op[1])); break; case nir_op_iand: assert(nir_dest_bit_size(instr->dest.dest) < 64); - if (devinfo->gen >= 8) { - op[0] = resolve_source_modifiers(op[0]); - op[1] = resolve_source_modifiers(op[1]); - } - try_immediate_source(instr, op, true, devinfo); + try_immediate_source(instr, op, true); emit(AND(dst, op[0], op[1])); break; @@ -1843,19 +1799,19 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_ishl: assert(nir_dest_bit_size(instr->dest.dest) < 64); - try_immediate_source(instr, op, false, devinfo); + try_immediate_source(instr, op, false); emit(SHL(dst, op[0], op[1])); break; case nir_op_ishr: assert(nir_dest_bit_size(instr->dest.dest) < 64); - try_immediate_source(instr, op, false, devinfo); + try_immediate_source(instr, op, false); emit(ASR(dst, op[0], op[1])); break; case nir_op_ushr: assert(nir_dest_bit_size(instr->dest.dest) < 64); - try_immediate_source(instr, op, false, devinfo); + try_immediate_source(instr, op, false); emit(SHR(dst, op[0], op[1])); break; @@ -1902,22 +1858,22 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_fdot_replicated2: - try_immediate_source(instr, op, true, devinfo); + try_immediate_source(instr, op, true); inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]); break; case nir_op_fdot_replicated3: - try_immediate_source(instr, op, true, devinfo); + try_immediate_source(instr, op, true); inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]); break; case nir_op_fdot_replicated4: - try_immediate_source(instr, op, true, devinfo); + try_immediate_source(instr, op, true); inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]); break; case nir_op_fdph_replicated: - try_immediate_source(instr, op, false, devinfo); + try_immediate_source(instr, op, false); inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]); break; diff --git a/src/intel/compiler/brw_vec4_surface_builder.cpp b/src/intel/compiler/brw_vec4_surface_builder.cpp index 7120558..5418f60 100644 --- a/src/intel/compiler/brw_vec4_surface_builder.cpp +++ b/src/intel/compiler/brw_vec4_surface_builder.cpp @@ -163,8 +163,7 @@ namespace brw { unsigned dims, unsigned size, brw_predicate pred) { - const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || - bld.shader->devinfo->is_haswell); + const bool has_simd4x2 = bld.shader->devinfo->is_haswell; emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(), emit_insert(bld, addr, dims, has_simd4x2), has_simd4x2 ? 1 : dims, @@ -185,8 +184,7 @@ namespace brw { unsigned dims, unsigned rsize, unsigned op, brw_predicate pred) { - const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || - bld.shader->devinfo->is_haswell); + const bool has_simd4x2 = bld.shader->devinfo->is_haswell; /* Zip the components of both sources, they are represented as the X * and Y components of the same vector. diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp index f18fd9e..094ec54 100644 --- a/src/intel/compiler/brw_vec4_visitor.cpp +++ b/src/intel/compiler/brw_vec4_visitor.cpp @@ -306,22 +306,9 @@ vec4_visitor::fix_3src_operand(const src_reg &src) } src_reg -vec4_visitor::resolve_source_modifiers(const src_reg &src) -{ - if (!src.abs && !src.negate) - return src; - - dst_reg resolved = dst_reg(this, glsl_type::ivec4_type); - resolved.type = src.type; - emit(MOV(resolved, src)); - - return src_reg(resolved); -} - -src_reg vec4_visitor::fix_math_operand(const src_reg &src) { - if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE) + if (devinfo->gen < 6 || src.file == BAD_FILE) return src; /* The gen6 math instruction ignores the source modifiers -- @@ -753,35 +740,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, vec4_instruction *pull; - if (devinfo->gen >= 9) { - /* Gen9+ needs a message header in order to use SIMD4x2 mode */ - src_reg header(this, glsl_type::uvec4_type, 2); - - pull = new(mem_ctx) - vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, - dst_reg(header)); - - if (before_inst) - emit_before(before_block, before_inst, pull); - else - emit(pull); - - dst_reg index_reg = retype(byte_offset(dst_reg(header), REG_SIZE), - offset_reg.type); - pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg); - - if (before_inst) - emit_before(before_block, before_inst, pull); - else - emit(pull); - - pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, - dst, - surf_index, - header); - pull->mlen = 2; - pull->header_size = 1; - } else if (devinfo->gen >= 7) { + if (devinfo->gen >= 7) { dst_reg grf_offset = dst_reg(this, glsl_type::uint_type); grf_offset.type = offset_reg.type; @@ -838,24 +797,9 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type, inst->base_mrf = 2; inst->src[1] = surface; inst->src[2] = brw_imm_ud(0); /* sampler */ + inst->mlen = 1; - int param_base; - - if (devinfo->gen >= 9) { - /* Gen9+ needs a message header in order to use SIMD4x2 mode */ - vec4_instruction *header_inst = new(mem_ctx) - vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, - dst_reg(MRF, inst->base_mrf)); - - emit(header_inst); - - inst->mlen = 2; - inst->header_size = 1; - param_base = inst->base_mrf + 1; - } else { - inst->mlen = 1; - param_base = inst->base_mrf; - } + const int param_base = inst->base_mrf; /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */ int coord_mask = (1 << coordinate_type->vector_elements) - 1; @@ -874,7 +818,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type, bool vec4_visitor::is_high_sampler(src_reg sampler) { - if (devinfo->gen < 8 && !devinfo->is_haswell) + if (!devinfo->is_haswell) return false; return sampler.file != IMM || sampler.ud >= 16; @@ -902,8 +846,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op, case ir_txl: opcode = SHADER_OPCODE_TXL; break; case ir_txd: opcode = SHADER_OPCODE_TXD; break; case ir_txf: opcode = SHADER_OPCODE_TXF; break; - case ir_txf_ms: opcode = (devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W : - SHADER_OPCODE_TXF_CMS); break; + case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; case ir_txs: opcode = SHADER_OPCODE_TXS; break; case ir_tg4: opcode = offset_value.file != BAD_FILE ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break; @@ -937,7 +880,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op, * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal */ inst->header_size = - (devinfo->gen < 5 || devinfo->gen >= 9 || + (devinfo->gen < 5 || inst->offset != 0 || op == ir_tg4 || op == ir_texture_samples || is_high_sampler(sampler_reg)) ? 1 : 0; @@ -1705,11 +1648,6 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, offset = src_reg(this, glsl_type::uint_type); emit_before(block, inst, ADD(dst_reg(offset), indirect, brw_imm_ud(reg_offset * 16))); - } else if (devinfo->gen >= 8) { - /* Store the offset in a GRF so we can send-from-GRF. */ - offset = src_reg(this, glsl_type::uint_type); - emit_before(block, inst, MOV(dst_reg(offset), - brw_imm_ud(reg_offset * 16))); } else { offset = brw_imm_d(reg_offset * 16); }