From: Eric Anholt Date: Thu, 4 Apr 2013 21:10:18 +0000 (-0700) Subject: i965/vs: Use GRFs for pull constant offsets on gen7. X-Git-Tag: mesa-9.2.1~1862 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d5f7aebac2b1afbc5023cd114174860d8d763d06;p=platform%2Fupstream%2Fmesa.git i965/vs: Use GRFs for pull constant offsets on gen7. This allows the computation of the offset to get written directly into the message source. shader-db results: total instructions in shared programs: 3308390 -> 3283025 (-0.77%) instructions in affected programs: 442998 -> 417633 (-5.73%) No difference in GLB2.7 low res (n=9). Reviewed-by: Matt Turner --- diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3d07c36..a13f9dc 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -739,6 +739,7 @@ enum opcode { VS_OPCODE_SCRATCH_READ, VS_OPCODE_SCRATCH_WRITE, VS_OPCODE_PULL_CONSTANT_LOAD, + VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 1a52039..b3bd1b9 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -498,6 +498,8 @@ brw_instruction_name(enum opcode op) return "scratch_write"; case VS_OPCODE_PULL_CONSTANT_LOAD: return "pull_constant_load"; + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + return "pull_constant_load_gen7"; default: /* Yes, this leaks. It's in debug code, it should never occur, and if diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e470ac8..67dd17a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -230,7 +230,13 @@ vec4_instruction::is_math() bool vec4_instruction::is_send_from_grf() { - return opcode == SHADER_OPCODE_SHADER_TIME_ADD; + switch (opcode) { + case SHADER_OPCODE_SHADER_TIME_ADD: + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + return true; + default: + return false; + } } bool diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 8f130e1..e286925 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -544,6 +544,10 @@ private: struct brw_reg dst, struct brw_reg index, struct brw_reg offset); + void generate_pull_constant_load_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset); struct brw_context *brw; struct intel_context *intel; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index e378f7f..963901c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -558,27 +558,11 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg index, struct brw_reg offset) { + assert(intel->gen <= 7); assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; - if (intel->gen == 7) { - gen6_resolve_implied_move(p, &offset, inst->base_mrf); - brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, offset); - brw_set_sampler_message(p, insn, - surf_index, - 0, /* LD message ignores sampler unit */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - 1, /* rlen */ - 1, /* mlen */ - false, /* no header */ - BRW_SAMPLER_SIMD_MODE_SIMD4X2, - 0); - return; - } - struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); @@ -614,6 +598,29 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, } void +vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset) +{ + assert(surf_index.file == BRW_IMMEDIATE_VALUE && + surf_index.type == BRW_REGISTER_TYPE_UD); + + brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dst); + brw_set_src0(p, insn, offset); + brw_set_sampler_message(p, insn, + surf_index.dw1.ud, + 0, /* LD message ignores sampler unit */ + GEN5_SAMPLER_MESSAGE_SAMPLE_LD, + 1, /* rlen */ + 1, /* mlen */ + false, /* no header */ + BRW_SAMPLER_SIMD_MODE_SIMD4X2, + 0); +} + +void vec4_generator::generate_vs_instruction(vec4_instruction *instruction, struct brw_reg dst, struct brw_reg *src) @@ -673,6 +680,10 @@ vec4_generator::generate_vs_instruction(vec4_instruction *instruction, generate_pull_constant_load(inst, dst, src[0], src[1]); break; + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); + break; + case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ca1cfe8..ed8e65de 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2880,10 +2880,20 @@ vec4_visitor::emit_pull_constant_load(vec4_instruction *inst, src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset); vec4_instruction *load; - load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD, - temp, index, offset); - load->base_mrf = 14; - load->mlen = 1; + if (intel->gen >= 7) { + dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + grf_offset.type = offset.type; + emit_before(inst, MOV(grf_offset, offset)); + + load = new(mem_ctx) vec4_instruction(this, + VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + temp, index, src_reg(grf_offset)); + } else { + load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD, + temp, index, offset); + load->base_mrf = 14; + load->mlen = 1; + } emit_before(inst, load); }