From 76c1086f2dfb37a1edf6d2df6eebbe11ccbfc50b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Mar 2015 10:17:32 -0700 Subject: [PATCH] i965: Change header_present to header_size in backend_instruction Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 4 +-- src/mesa/drivers/dri/i965/brw_fs.cpp | 18 ++++++------- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 20 +++++++-------- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 32 +++++++++++++----------- src/mesa/drivers/dri/i965/brw_shader.h | 4 ++- src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 10 ++++---- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 16 ++++++------ 9 files changed, 56 insertions(+), 52 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 32919b1..c1b7609 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -88,7 +88,7 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst, inst->base_mrf = base_mrf; inst->mlen = msg_length; - inst->header_present = false; + inst->header_size = 0; insts.push_tail(inst); } @@ -104,7 +104,7 @@ brw_blorp_eu_emitter::emit_render_target_write(const struct brw_reg &src0, inst->src[0] = src0; inst->base_mrf = msg_reg_nr; inst->mlen = msg_length; - inst->header_present = use_header; + inst->header_size = use_header ? 2 : 0; inst->target = BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX; insts.push_tail(inst); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 1ca7ca6..22223e1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -430,7 +430,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, if (devinfo->gen < 7) { inst->base_mrf = 13; - inst->header_present = true; + inst->header_size = 1; if (devinfo->gen == 4) inst->mlen = 3; else @@ -478,7 +478,7 @@ fs_inst::equals(fs_inst *inst) const base_mrf == inst->base_mrf && target == inst->target && eot == inst->eot && - header_present == inst->header_present && + header_size == inst->header_size && shadow_compare == inst->shadow_compare && exec_size == inst->exec_size && offset == inst->offset); @@ -2611,10 +2611,10 @@ fs_visitor::opt_zero_samples() * parameters is avoided because it seems to cause a GPU hang but I * can't find any documentation indicating that this is expected. */ - while (inst->mlen > inst->header_present + dispatch_width / 8 && - load_payload->src[(inst->mlen - inst->header_present) / + while (inst->mlen > inst->header_size + dispatch_width / 8 && + load_payload->src[(inst->mlen - inst->header_size) / (dispatch_width / 8) + - inst->header_present - 1].is_zero()) { + inst->header_size - 1].is_zero()) { inst->mlen -= dispatch_width / 8; progress = true; } @@ -2683,7 +2683,7 @@ fs_visitor::opt_sampler_eot() * we have enough space, but it will make sure the dead code eliminator kills * the instruction that this will replace. */ - if (tex_inst->header_present) + if (tex_inst->header_size != 0) return true; fs_reg send_header = vgrf(load_payload->sources + 1); @@ -2709,7 +2709,7 @@ fs_visitor::opt_sampler_eot() new_load_payload->regs_written = load_payload->regs_written + 1; tex_inst->mlen++; - tex_inst->header_present = true; + tex_inst->header_size = 1; tex_inst->insert_before(cfg->blocks[cfg->num_blocks - 1], new_load_payload); tex_inst->src[0] = send_header; tex_inst->dst = reg_null_ud; @@ -3047,7 +3047,7 @@ fs_visitor::emit_repclear_shader() write->saturate = key->clamp_fragment_color; write->base_mrf = color_mrf; write->target = 0; - write->header_present = false; + write->header_size = 0; write->mlen = 1; } else { assume(key->nr_color_regions > 0); @@ -3056,7 +3056,7 @@ fs_visitor::emit_repclear_shader() write->saturate = key->clamp_fragment_color; write->base_mrf = base_mrf; write->target = i; - write->header_present = true; + write->header_size = 2; write->mlen = 3; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 9c4ed0b..ad38475 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -179,7 +179,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) a->regs_written == b->regs_written && a->base_mrf == b->base_mrf && a->eot == b->eot && - a->header_present == b->header_present && + a->header_size == b->header_size && a->shadow_compare == b->shadow_compare) : true) && operands_match(a, b, negate); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index d476c92..a99b7f7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -242,7 +242,7 @@ fs_generator::fire_fb_write(fs_inst *inst, 0, inst->eot, last_render_target, - inst->header_present); + inst->header_size != 0); brw_mark_surface_used(&prog_data->base, surf_index); } @@ -264,7 +264,7 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied * move, here's g1. */ - if (inst->header_present) { + if (inst->header_size != 0) { brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); @@ -413,7 +413,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst) 0, true, true, - inst->header_present); + inst->header_size != 0); } void @@ -714,7 +714,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src rlen = 0; } - assert(devinfo->gen < 7 || !inst->header_present || + assert(devinfo->gen < 7 || inst->header_size == 0 || src.file == BRW_GENERAL_REGISTER_FILE); assert(sampler_index.type == BRW_REGISTER_TYPE_UD); @@ -723,7 +723,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src * we need to set it up explicitly and load the offset bitfield. * Otherwise, we can use an implied move from g0 to the first message reg. */ - if (inst->header_present) { + if (inst->header_size != 0) { if (devinfo->gen < 6 && !inst->offset) { /* Set up an implied move from g0 to the MRF. */ src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -772,7 +772,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src msg_type, rlen, inst->mlen, - inst->header_present, + inst->header_size != 0, simd_mode, return_format); @@ -812,7 +812,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src msg_type, rlen, inst->mlen /* mlen */, - inst->header_present /* header */, + inst->header_size != 0 /* header */, simd_mode, return_format); @@ -1155,7 +1155,7 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg offset) { assert(devinfo->gen < 7); /* Should use the gen7 variant. */ - assert(inst->header_present); + assert(inst->header_size != 0); assert(inst->mlen); assert(index.file == BRW_IMMEDIATE_VALUE && @@ -1208,7 +1208,7 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst, msg_type, rlen, inst->mlen, - inst->header_present, + inst->header_size != 0, simd_mode, return_format); @@ -1225,7 +1225,7 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, /* Varying-offset pull constant loads are treated as a normal expression on * gen7, so the fact that it's a send message is hidden at the IR level. */ - assert(!inst->header_present); + assert(inst->header_size == 0); assert(!inst->mlen); assert(index.type == BRW_REGISTER_TYPE_UD); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index c080e9b..de0aec7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1583,7 +1583,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); inst->base_mrf = base_mrf; inst->mlen = mlen; - inst->header_present = true; + inst->header_size = 1; inst->regs_written = simd16 ? 8 : 4; if (simd16) { @@ -1654,7 +1654,7 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); inst->base_mrf = message.reg - 1; inst->mlen = msg_end.reg - inst->base_mrf; - inst->header_present = true; + inst->header_size = 1; inst->regs_written = 8; return inst; @@ -1677,7 +1677,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, bool has_offset) { int reg_width = dispatch_width / 8; - bool header_present = false; + unsigned header_size = 0; fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width); fs_reg msg_coords = message; @@ -1686,7 +1686,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, /* The offsets set up by the ir_texture visitor are in the * m1 header, so we can't go headerless. */ - header_present = true; + header_size = 1; message.reg--; } @@ -1789,7 +1789,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); inst->base_mrf = message.reg; inst->mlen = msg_end.reg - message.reg; - inst->header_present = header_present; + inst->header_size = header_size; inst->regs_written = 4 * reg_width; if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { @@ -1818,7 +1818,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, fs_reg offset_value) { int reg_width = dispatch_width / 8; - bool header_present = false; + unsigned header_size = 0; fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE); for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) { @@ -1838,7 +1838,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, * The sampler index is only 4-bits, so for larger sampler numbers we * need to offset the Sampler State Pointer in the header. */ - header_present = true; + header_size = 1; sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); length++; } @@ -1997,7 +1997,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, int mlen; if (reg_width == 2) - mlen = length * reg_width - header_present; + mlen = length * reg_width - header_size; else mlen = length * reg_width; @@ -2029,7 +2029,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, fs_inst *inst = emit(opcode, dst, src_payload, sampler); inst->base_mrf = -1; inst->mlen = mlen; - inst->header_present = header_present; + inst->header_size = header_size; inst->regs_written = 4 * reg_width; if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { @@ -2175,7 +2175,7 @@ fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler) fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler); inst->base_mrf = -1; inst->mlen = components * reg_width; - inst->header_present = false; + inst->header_size = 0; inst->regs_written = 4 * reg_width; /* we only care about one reg of * response, but the sampler always * writes 4/8 @@ -3395,7 +3395,7 @@ fs_visitor::emit_dummy_fs() write->base_mrf = 2; write->mlen = 4 * reg_width; } else { - write->header_present = true; + write->header_size = 2; write->base_mrf = 0; write->mlen = 2 + 4 * reg_width; } @@ -3727,7 +3727,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; this->current_annotation = "FB write header"; - bool header_present = true; + int header_size = 2; int reg_size = dispatch_width / 8; /* We can potentially have a message length of up to 15, so we have to set @@ -3747,12 +3747,14 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) && color1.file == BAD_FILE && key->nr_color_regions == 1) { - header_present = false; + header_size = 0; } - if (header_present) + if (header_size != 0) { + assert(header_size == 2); /* Allocate 2 registers for a header */ length += 2; + } if (payload.aa_dest_stencil_reg) { sources[length] = fs_reg(GRF, alloc.allocate(1)); @@ -3851,7 +3853,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, } write->mlen = load->regs_written; - write->header_present = header_present; + write->header_size = header_size; if (prog_data->uses_kill) { write->predicate = BRW_PREDICATE_NORMAL; write->flag_subreg = 1; diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index ebce51d..59a0eff 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -192,12 +192,14 @@ struct backend_instruction { bool no_dd_check:1; bool saturate:1; bool shadow_compare:1; - bool header_present:1; /* Chooses which flag subregister (f0.0 or f0.1) is used for conditional * mod and predication. */ unsigned flag_subreg:1; + + /** The number of hardware registers used for a message header. */ + uint8_t header_size; }; #ifdef __cplusplus diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index a8d0e4a..2841d98 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -316,7 +316,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) case SHADER_OPCODE_TXS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: - return inst->header_present ? 1 : 0; + return inst->header_size; default: unreachable("not reached"); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 9d37c93..ef77b8d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -326,7 +326,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, * to set it up explicitly and load the offset bitfield. Otherwise, we can * use an implied move from g0 to the first message register. */ - if (inst->header_present) { + if (inst->header_size != 0) { if (devinfo->gen < 6 && !inst->offset) { /* Set up an implied move from g0 to the MRF. */ src = brw_vec8_grf(0, 0); @@ -391,7 +391,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, msg_type, 1, /* response length */ inst->mlen, - inst->header_present, + inst->header_size != 0, BRW_SAMPLER_SIMD_MODE_SIMD4X2, return_format); @@ -431,7 +431,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, msg_type, 1 /* rlen */, inst->mlen /* mlen */, - inst->header_present /* header */, + inst->header_size != 0 /* header */, BRW_SAMPLER_SIMD_MODE_SIMD4X2, return_format); @@ -1051,7 +1051,7 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ inst->mlen, - inst->header_present, + inst->header_size != 0, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); @@ -1083,7 +1083,7 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1 /* rlen */, inst->mlen, - inst->header_present, + inst->header_size != 0, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f64860d..5a60fe4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -50,7 +50,7 @@ vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst, this->shadow_compare = false; this->ir = NULL; this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS; - this->header_present = false; + this->header_size = 0; this->flag_subreg = 0; this->mlen = 0; this->base_mrf = 0; @@ -1340,7 +1340,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, surf_index, header); pull->mlen = 2; - pull->header_present = true; + pull->header_size = 1; } else if (devinfo->gen >= 7) { dst_reg grf_offset = dst_reg(this, glsl_type::int_type); @@ -2654,19 +2654,19 @@ vec4_visitor::visit(ir_texture *ir) * - Gather channel selection * - Sampler indices too large to fit in a 4-bit value. */ - inst->header_present = - devinfo->gen < 5 || devinfo->gen >= 9 || - inst->offset != 0 || ir->op == ir_tg4 || - is_high_sampler(devinfo, sampler_reg); + inst->header_size = + (devinfo->gen < 5 || devinfo->gen >= 9 || + inst->offset != 0 || ir->op == ir_tg4 || + is_high_sampler(devinfo, sampler_reg)) ? 1 : 0; inst->base_mrf = 2; - inst->mlen = inst->header_present + 1; /* always at least one */ + inst->mlen = inst->header_size + 1; /* always at least one */ inst->dst.writemask = WRITEMASK_XYZW; inst->shadow_compare = ir->shadow_comparitor != NULL; inst->src[1] = sampler_reg; /* MRF for the first parameter */ - int param_base = inst->base_mrf + inst->header_present; + int param_base = inst->base_mrf + inst->header_size; if (ir->op == ir_txs || ir->op == ir_query_levels) { int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X; -- 2.7.4