From 5f41d052bf53e32761fb528f4be99a1af3a33ebc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 19 Sep 2014 20:36:52 -0700 Subject: [PATCH] i965/fs: Make fs_reg::effective_width take fs_inst* instead of fs_visitor* Now that we have execution sizes, we can use that instead of the dispatch width. This way it also works for 8-wide instructions in SIMD16. i965/fs: Make effective_width a variable instead of a function i965/fs: Preserve effective width in constant propagation Signed-off-by: Jason Ekstrand Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 50 +++++++++++----------- src/mesa/drivers/dri/i965/brw_fs.h | 3 +- .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 27 ++++++------ 3 files changed, 43 insertions(+), 37 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 3eb429f..73d4916 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -90,6 +90,30 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, } assert(this->exec_size != 0); + for (int i = 0; i < sources; ++i) { + switch (this->src[i].file) { + case BAD_FILE: + this->src[i].effective_width = 8; + break; + case GRF: + case HW_REG: + assert(this->src[i].width > 0); + if (this->src[i].width == 1) { + this->src[i].effective_width = this->exec_size; + } else { + this->src[i].effective_width = this->src[i].width; + } + break; + case IMM: + case UNIFORM: + this->src[i].effective_width = this->exec_size; + break; + default: + unreachable("Invalid source register file"); + } + } + this->dst.effective_width = this->exec_size; + this->conditional_mod = BRW_CONDITIONAL_NONE; /* This will be the case for almost all instructions. */ @@ -352,7 +376,7 @@ fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources) * dealing with whole registers. If this ever changes, we can deal * with it later. */ - int size = src[i].effective_width(this) * type_sz(src[i].type); + int size = src[i].effective_width * type_sz(src[i].type); assert(size % 32 == 0); inst->regs_written += (size + 31) / 32; } @@ -580,28 +604,6 @@ fs_reg::equals(const fs_reg &r) const stride == r.stride); } -uint8_t -fs_reg::effective_width(const fs_visitor *v) const -{ - switch (this->file) { - case BAD_FILE: - return 8; - case UNIFORM: - case IMM: - assert(this->width == 1); - return v->dispatch_width; - case GRF: - case HW_REG: - assert(this->width > 1 && this->width <= v->dispatch_width); - assert(this->width % 8 == 0); - return this->width; - case MRF: - unreachable("MRF registers cannot be used as sources"); - default: - unreachable("Invalid register file"); - } -} - fs_reg & fs_reg::apply_stride(unsigned stride) { @@ -2994,7 +2996,7 @@ fs_visitor::lower_load_payload() fs_reg dst = inst->dst; for (int i = 0; i < inst->sources; i++) { - dst.width = inst->src[i].effective_width(this); + dst.width = inst->src[i].effective_width; dst.type = inst->src[i].type; if (inst->src[i].file == BAD_FILE) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0e39802..e5202f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -62,6 +62,7 @@ namespace brw { class fs_live_variables; } +class fs_inst; class fs_visitor; class fs_reg : public backend_reg { @@ -110,7 +111,7 @@ public: * effectively take on the width of the instruction in which they are * used. */ - uint8_t effective_width(const fs_visitor *v) const; + uint8_t effective_width; /** Register region horizontal stride */ uint8_t stride; diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index b4f4431..322debf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -450,9 +450,12 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) if (inst->src[i].negate || inst->src[i].abs) continue; + fs_reg val = entry->src; + val.effective_width = inst->src[i].effective_width; + switch (inst->opcode) { case BRW_OPCODE_MOV: - inst->src[i] = entry->src; + inst->src[i] = val; progress = true; break; @@ -468,7 +471,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) case BRW_OPCODE_SHR: case BRW_OPCODE_SUBB: if (i == 1) { - inst->src[i] = entry->src; + inst->src[i] = val; progress = true; } break; @@ -481,7 +484,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) case BRW_OPCODE_XOR: case BRW_OPCODE_ADDC: if (i == 1) { - inst->src[i] = entry->src; + inst->src[i] = val; progress = true; } else if (i == 0 && inst->src[1].file != IMM) { /* Fit this constant in by commuting the operands. @@ -494,7 +497,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) inst->src[1].type == BRW_REGISTER_TYPE_UD)) break; inst->src[0] = inst->src[1]; - inst->src[1] = entry->src; + inst->src[1] = val; progress = true; } break; @@ -502,7 +505,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) case BRW_OPCODE_CMP: case BRW_OPCODE_IF: if (i == 1) { - inst->src[i] = entry->src; + inst->src[i] = val; progress = true; } else if (i == 0 && inst->src[1].file != IMM) { enum brw_conditional_mod new_cmod; @@ -513,7 +516,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) * flipping the test */ inst->src[0] = inst->src[1]; - inst->src[1] = entry->src; + inst->src[1] = val; inst->conditional_mod = new_cmod; progress = true; } @@ -522,11 +525,11 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) case BRW_OPCODE_SEL: if (i == 1) { - inst->src[i] = entry->src; + inst->src[i] = val; progress = true; } else if (i == 0 && inst->src[1].file != IMM) { inst->src[0] = inst->src[1]; - inst->src[1] = entry->src; + inst->src[1] = val; /* If this was predicated, flipping operands means * we also need to flip the predicate. @@ -548,14 +551,14 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) assert(i == 0); if (inst->src[0].fixed_hw_reg.dw1.f != 0.0f) { inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = entry->src; + inst->src[0] = val; inst->src[0].fixed_hw_reg.dw1.f = 1.0f / inst->src[0].fixed_hw_reg.dw1.f; progress = true; } break; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - inst->src[i] = entry->src; + inst->src[i] = val; progress = true; break; @@ -639,13 +642,13 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, inst->dst.file == GRF) { int offset = 0; for (int i = 0; i < inst->sources; i++) { - int regs_written = ((inst->src[i].effective_width(this) * + int regs_written = ((inst->src[i].effective_width * type_sz(inst->src[i].type)) + 31) / 32; if (inst->src[i].file == GRF) { acp_entry *entry = ralloc(copy_prop_ctx, acp_entry); entry->dst = inst->dst; entry->dst.reg_offset = offset; - entry->dst.width = inst->src[i].effective_width(this); + entry->dst.width = inst->src[i].effective_width; entry->src = inst->src[i]; entry->regs_written = regs_written; entry->opcode = inst->opcode; -- 2.7.4