From b278f65e1c5295794dcf08d100356e6ded6c1f32 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 6 Dec 2012 10:36:11 -0800 Subject: [PATCH] i965/fs: Add an instruction flag for choosing the flag subregister. We're going to redo discard handling to track discards in the other flag subregister, saving instructions in the discard and allowing predicated jumps out to the end of the shader. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_eu.c | 6 ++++++ src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 16 ++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 7 ++++++- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 9 +++++---- .../drivers/dri/i965/brw_fs_schedule_instructions.cpp | 16 ++++++++-------- 6 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 46ada8d..360089c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -94,6 +94,12 @@ void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) p->current->header.destreg__conditionalmod = conditional; } +void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg) +{ + p->current->bits2.da1.flag_reg_nr = reg; + p->current->bits2.da1.flag_subreg_nr = subreg; +} + void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) { p->current->header.access_mode = access_mode; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index e43b543..adefcfd 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -810,6 +810,7 @@ void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ) void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse); void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); +void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg); void brw_set_acc_write_control(struct brw_compile *p, GLuint value); void brw_init_compile(struct brw_context *, struct brw_compile *p, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d77a67e..dbf48f8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2222,6 +2222,12 @@ fs_visitor::remove_duplicate_mrf_writes() void fs_visitor::dump_instruction(fs_inst *inst) { + if (inst->predicate) { + printf("(%cf0.%d) ", + inst->predicate_inverse ? '-' : '+', + inst->flag_subreg); + } + if (inst->opcode < ARRAY_SIZE(opcode_descs) && opcode_descs[inst->opcode].name) { printf("%s", opcode_descs[inst->opcode].name); @@ -2230,8 +2236,18 @@ fs_visitor::dump_instruction(fs_inst *inst) } if (inst->saturate) printf(".sat"); + if (inst->conditional_mod) { + printf(".cmod"); + if (!inst->predicate && + (intel->gen < 5 || (inst->opcode != BRW_OPCODE_SEL && + inst->opcode != BRW_OPCODE_IF && + inst->opcode != BRW_OPCODE_WHILE))) { + printf(".f0.%d\n", inst->flag_subreg); + } + } printf(" "); + switch (inst->dst.file) { case GRF: printf("vgrf%d", inst->dst.reg); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 51efc11..b60a37e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -165,6 +165,11 @@ public: bool saturate; int conditional_mod; /**< BRW_CONDITIONAL_* */ + /* Chooses which flag subregister (f0.0 or f0.1) is used for conditional + * mod and predication. + */ + uint8_t flag_subreg; + int mlen; /**< SEND message length */ int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ uint32_t texture_offset; /**< Texture offset bitfield */ @@ -511,7 +516,7 @@ private: struct brw_reg dst, struct brw_reg index, struct brw_reg offset); - void generate_mov_dispatch_to_flags(); + void generate_mov_dispatch_to_flags(fs_inst *inst); struct brw_context *brw; struct intel_context *intel; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 0750b86..b3d7f81 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -742,15 +742,15 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, * Used only on Gen6 and above. */ void -fs_generator::generate_mov_dispatch_to_flags() +fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst) { - struct brw_reg f0 = brw_flag_reg(0, 0); + struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg); struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); assert (intel->gen >= 6); brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, f0, g1); + brw_MOV(p, flags, g1); brw_pop_insn_state(p); } @@ -915,6 +915,7 @@ fs_generator::generate_code(exec_list *instructions) brw_set_conditionalmod(p, inst->conditional_mod); brw_set_predicate_control(p, inst->predicate); brw_set_predicate_inverse(p, inst->predicate_inverse); + brw_set_flag_reg(p, 0, inst->flag_subreg); brw_set_saturate(p, inst->saturate); brw_set_mask_control(p, inst->force_writemask_all); @@ -1121,7 +1122,7 @@ fs_generator::generate_code(exec_list *instructions) break; case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: - generate_mov_dispatch_to_flags(); + generate_mov_dispatch_to_flags(inst); break; case SHADER_OPCODE_SHADER_TIME_ADD: diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 6b7c412..96d1131 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -249,7 +249,7 @@ instruction_scheduler::calculate_deps() { schedule_node *last_grf_write[virtual_grf_count]; schedule_node *last_mrf_write[BRW_MAX_MRF]; - schedule_node *last_conditional_mod = NULL; + schedule_node *last_conditional_mod[2] = { NULL, NULL }; /* Fixed HW registers are assumed to be separate from the virtual * GRFs, so they can be tracked separately. We don't really write * to fixed GRFs much, so don't bother tracking them on a more @@ -299,8 +299,8 @@ instruction_scheduler::calculate_deps() } if (inst->predicate) { - assert(last_conditional_mod); - add_dep(last_conditional_mod, n); + assert(last_conditional_mod[inst->flag_subreg]); + add_dep(last_conditional_mod[inst->flag_subreg], n); } /* write-after-write deps. */ @@ -339,15 +339,15 @@ instruction_scheduler::calculate_deps() */ if (inst->conditional_mod || inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) { - add_dep(last_conditional_mod, n, 0); - last_conditional_mod = n; + add_dep(last_conditional_mod[inst->flag_subreg], n, 0); + last_conditional_mod[inst->flag_subreg] = n; } } /* bottom-to-top dependencies: WAR */ memset(last_grf_write, 0, sizeof(last_grf_write)); memset(last_mrf_write, 0, sizeof(last_mrf_write)); - last_conditional_mod = NULL; + memset(last_conditional_mod, 0, sizeof(last_conditional_mod)); last_fixed_grf_write = NULL; exec_node *node; @@ -383,7 +383,7 @@ instruction_scheduler::calculate_deps() } if (inst->predicate) { - add_dep(n, last_conditional_mod); + add_dep(n, last_conditional_mod[inst->flag_subreg]); } /* Update the things this instruction wrote, so earlier reads @@ -422,7 +422,7 @@ instruction_scheduler::calculate_deps() */ if (inst->conditional_mod || inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) { - last_conditional_mod = n; + last_conditional_mod[inst->flag_subreg] = n; } } } -- 2.7.4