From 64c251bb3afe6809911493c9a0830375702c2e40 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 4 Aug 2020 16:39:06 -0700 Subject: [PATCH] intel/fs: Combine constants for SEL instructions too It is very common to have bcsel where the second and third sources are both constants. This results in a situation where we would want to emit a SEL with two constant sources, but that's not allowed. Previously, we would load both constants into registers, then let constant propagation copy the last constant into the SEL instruction. This results in the constant using an entire SIMD register instead of a single channel. Instead, copy propagate both sources, then let the combine-constants pass do its thing. In the worst case, this stores the constant in a single channel of the SIMD register. In the best case, it reuses a value that was loaded into a register to satisfy another instruction. shader-db results: Tiger Lake, Ice Lake, and Skylake had similar results. (Ice Lake shown) total instructions in shared programs: 19951549 -> 19948709 (-0.01%) instructions in affected programs: 482795 -> 479955 (-0.59%) helped: 1184 / HURT: 3 total cycles in shared programs: 858584724 -> 858205341 (-0.04%) cycles in affected programs: 356168375 -> 355788992 (-0.11%) helped: 1448 / HURT: 1195 total spills in shared programs: 6569 -> 6255 (-4.78%) spills in affected programs: 912 -> 598 (-34.43%) helped: 58 / HURT: 0 total fills in shared programs: 8218 -> 7813 (-4.93%) fills in affected programs: 1570 -> 1165 (-25.80%) helped: 58 / HURT: 0 LOST: 6 GAINED: 16 Broadwell total instructions in shared programs: 17819660 -> 17819389 (<.01%) instructions in affected programs: 1078129 -> 1077858 (-0.03%) helped: 1067 / HURT: 304 total cycles in shared programs: 904722624 -> 905035016 (0.03%) cycles in affected programs: 362583117 -> 362895509 (0.09%) helped: 1381 / HURT: 1123 total spills in shared programs: 17884 -> 17922 (0.21%) spills in affected programs: 5088 -> 5126 (0.75%) helped: 55 / HURT: 152 total fills in shared programs: 25533 -> 26290 (2.96%) fills in affected programs: 12992 -> 13749 (5.83%) helped: 61 /HURT: 295 LOST: 7 GAINED: 24 Haswell total instructions in shared programs: 16678080 -> 16673976 (-0.02%) instructions in affected programs: 1162893 -> 1158789 (-0.35%) helped: 1584 / HURT: 7 total cycles in shared programs: 880180082 -> 879932525 (-0.03%) cycles in affected programs: 364067522 -> 363819965 (-0.07%) helped: 1226 / HURT: 976 total spills in shared programs: 14937 -> 14428 (-3.41%) spills in affected programs: 7866 -> 7357 (-6.47%) helped: 351 / HURT: 5 total fills in shared programs: 17572 -> 16975 (-3.40%) fills in affected programs: 11028 -> 10431 (-5.41%) helped: 350 / HURT: 3 LOST: 8 GAINED: 16 Ivy Bridge total instructions in shared programs: 15704044 -> 15703158 (<.01%) instructions in affected programs: 304513 -> 303627 (-0.29%) helped: 707 / HURT: 0 total cycles in shared programs: 433560149 -> 433471118 (-0.02%) cycles in affected programs: 19299650 -> 19210619 (-0.46%) helped: 687 / HURT: 395 LOST: 2 GAINED: 9 Sandy Bridge total instructions in shared programs: 13913386 -> 13912884 (<.01%) instructions in affected programs: 195687 -> 195185 (-0.26%) helped: 455 / HURT: 0 total cycles in shared programs: 741156272 -> 741136266 (<.01%) cycles in affected programs: 10934349 -> 10914343 (-0.18%) helped: 578 / HURT: 289 LOST: 9 GAINED: 4 Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8364056 -> 8364042 (<.01%) instructions in affected programs: 5178 -> 5164 (-0.27%) helped: 10 / HURT: 0 total cycles in shared programs: 248759794 -> 248757940 (<.01%) cycles in affected programs: 4305246 -> 4303392 (-0.04%) helped: 183 / HURT: 24 fossil-db results: Tiger Lake Instructions in all programs: 156943594 -> 156802601 (-0.1%) Instructions helped: 20595 Instructions hurt: 23248 Cycles in all programs: 7512086950 -> 7528386387 (+0.2%) Cycles helped: 29531 Cycles hurt: 27837 Spills in all programs: 13500 -> 5643 (-58.2%) Spills helped: 394 Spills hurt: 22 Fills in all programs: 18943 -> 6306 (-66.7%) Fills helped: 394 Fills hurt: 11 Gained: 93 Lost: 76 Ice Lake Instructions in all programs: 141395899 -> 141249621 (-0.1%) Instructions helped: 30067 Instructions hurt: 3 Cycles in all programs: 9097127057 -> 9089668235 (-0.1%) Cycles helped: 32268 Cycles hurt: 24315 Spills in all programs: 13695 -> 7564 (-44.8%) Spills helped: 403 Fills in all programs: 18400 -> 8494 (-53.8%) Fills helped: 403 Gained: 114 Lost: 137 Skylake Instructions in all programs: 131948328 -> 131826063 (-0.1%) Instructions helped: 29968 Instructions hurt: 3 Cycles in all programs: 8794778440 -> 8793934844 (-0.0%) Cycles helped: 32705 Cycles hurt: 23575 Spills in all programs: 10526 -> 7039 (-33.1%) Spills helped: 403 Fills in all programs: 11025 -> 7728 (-29.9%) Fills helped: 403 Gained: 102 Lost: 250 Tested-by: Lionel Landwerlin Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/brw_fs_combine_constants.cpp | 133 ++++++++++++++++++++++-- src/intel/compiler/brw_fs_copy_propagation.cpp | 31 +++--- 2 files changed, 140 insertions(+), 24 deletions(-) diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp index 1412093..9847f1f 100644 --- a/src/intel/compiler/brw_fs_combine_constants.cpp +++ b/src/intel/compiler/brw_fs_combine_constants.cpp @@ -803,19 +803,21 @@ struct fs_inst_box { struct reg_link { DECLARE_RALLOC_CXX_OPERATORS(reg_link) - reg_link(fs_inst *inst, unsigned src, bool negate) - : inst(inst), src(src), negate(negate) {} + reg_link(fs_inst *inst, unsigned src, bool negate, enum interpreted_type type) + : inst(inst), src(src), negate(negate), type(type) {} struct exec_node link; fs_inst *inst; uint8_t src; bool negate; + enum interpreted_type type; }; static struct exec_node * -link(void *mem_ctx, fs_inst *inst, unsigned src, bool negate) +link(void *mem_ctx, fs_inst *inst, unsigned src, bool negate, + enum interpreted_type type) { - reg_link *l = new(mem_ctx) reg_link(inst, src, negate); + reg_link *l = new(mem_ctx) reg_link(inst, src, negate, type); return &l->link; } @@ -1107,6 +1109,7 @@ static void add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip, unsigned i, bool must_promote, + bool allow_one_constant, bblock_t *block, ASSERTED const struct intel_device_info *devinfo, void *const_ctx) @@ -1123,7 +1126,7 @@ add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip, v->bit_size = 8 * type_sz(inst->src[i].type); v->instr_index = box_idx; v->src = i; - v->allow_one_constant = false; + v->allow_one_constant = allow_one_constant; v->no_negations = false; switch (inst->src[i].type) { @@ -1151,6 +1154,18 @@ add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip, default: unreachable("not reached"); } + + /* It is safe to change the type of the operands of a select instruction + * that has no conditional modifier, no source modifiers, and no saturate + * modifer. + */ + if (inst->opcode == BRW_OPCODE_SEL && + inst->conditional_mod == BRW_CONDITIONAL_NONE && + !inst->src[0].negate && !inst->src[0].abs && + !inst->src[1].negate && !inst->src[1].abs && + !inst->saturate) { + v->type = either_type; + } } bool @@ -1189,7 +1204,7 @@ fs_visitor::opt_combine_constants() assert(inst->src[0].file != IMM); if (inst->src[1].file == IMM && devinfo->ver < 8) { - add_candidate_immediate(&table, inst, ip, 1, true, block, + add_candidate_immediate(&table, inst, ip, 1, true, false, block, devinfo, const_ctx); } @@ -1204,7 +1219,7 @@ fs_visitor::opt_combine_constants() if (can_promote_src_as_imm(devinfo, inst, i)) continue; - add_candidate_immediate(&table, inst, ip, i, true, block, + add_candidate_immediate(&table, inst, ip, i, true, false, block, devinfo, const_ctx); } @@ -1216,15 +1231,38 @@ fs_visitor::opt_combine_constants() if (inst->src[i].file != IMM) continue; - add_candidate_immediate(&table, inst, ip, i, true, block, + add_candidate_immediate(&table, inst, ip, i, true, false, block, devinfo, const_ctx); } break; + case BRW_OPCODE_SEL: + if (inst->src[0].file == IMM) { + /* It is possible to have src0 be immediate but src1 not be + * immediate for the non-commutative conditional modifiers (e.g., + * G). + */ + if (inst->conditional_mod == BRW_CONDITIONAL_NONE || + /* Only GE and L are commutative. */ + inst->conditional_mod == BRW_CONDITIONAL_GE || + inst->conditional_mod == BRW_CONDITIONAL_L) { + assert(inst->src[1].file == IMM); + + add_candidate_immediate(&table, inst, ip, 0, true, true, block, + devinfo, const_ctx); + add_candidate_immediate(&table, inst, ip, 1, true, true, block, + devinfo, const_ctx); + } else { + add_candidate_immediate(&table, inst, ip, 0, true, false, block, + devinfo, const_ctx); + } + } + break; + case BRW_OPCODE_MOV: if (could_coissue(devinfo, inst) && inst->src[0].file == IMM) { - add_candidate_immediate(&table, inst, ip, 0, false, block, + add_candidate_immediate(&table, inst, ip, 0, false, false, block, devinfo, const_ctx); } break; @@ -1235,7 +1273,7 @@ fs_visitor::opt_combine_constants() assert(inst->src[0].file != IMM); if (could_coissue(devinfo, inst) && inst->src[1].file == IMM) { - add_candidate_immediate(&table, inst, ip, 1, false, block, + add_candidate_immediate(&table, inst, ip, 1, false, false, block, devinfo, const_ctx); } break; @@ -1284,7 +1322,8 @@ fs_visitor::opt_combine_constants() const unsigned src = table.values[result->user_map[j].index].src; imm->uses->push_tail(link(const_ctx, ib->inst, src, - result->user_map[j].negate)); + result->user_map[j].negate, + result->user_map[j].type)); if (ib->must_promote) imm->must_promote = true; @@ -1402,6 +1441,31 @@ fs_visitor::opt_combine_constants() for (int i = 0; i < table.len; i++) { foreach_list_typed(reg_link, link, link, table.imm[i].uses) { fs_reg *reg = &link->inst->src[link->src]; + + if (link->inst->opcode == BRW_OPCODE_SEL) { + if (link->type == either_type) { + /* Do not change the register type. */ + } else if (link->type == integer_only) { + reg->type = brw_int_type(type_sz(reg->type), true); + } else { + assert(link->type == float_only); + + switch (type_sz(reg->type)) { + case 2: + reg->type = BRW_REGISTER_TYPE_HF; + break; + case 4: + reg->type = BRW_REGISTER_TYPE_F; + break; + case 8: + reg->type = BRW_REGISTER_TYPE_DF; + break; + default: + unreachable("Bad type size"); + } + } + } + #ifdef DEBUG switch (reg->type) { case BRW_REGISTER_TYPE_DF: @@ -1452,6 +1516,53 @@ fs_visitor::opt_combine_constants() } } + /* Fixup any SEL instructions that have src0 still as an immediate. Fixup + * the types of any SEL instruction that have a negation on one of the + * sources. Adding the negation may have changed the type of that source, + * so the other source (and destination) must be changed to match. + */ + for (unsigned i = 0; i < table.num_boxes; i++) { + fs_inst *inst = table.boxes[i].inst; + + if (inst->opcode != BRW_OPCODE_SEL) + continue; + + /* If both sources have negation, the types had better be the same! */ + assert(!inst->src[0].negate || !inst->src[1].negate || + inst->src[0].type == inst->src[1].type); + + /* If either source has a negation, force the type of the other source + * and the type of the result to be the same. + */ + if (inst->src[0].negate) { + inst->src[1].type = inst->src[0].type; + inst->dst.type = inst->src[0].type; + } + + if (inst->src[1].negate) { + inst->src[0].type = inst->src[1].type; + inst->dst.type = inst->src[1].type; + } + + if (inst->src[0].file != IMM) + continue; + + assert(inst->src[1].file != IMM); + assert(inst->conditional_mod == BRW_CONDITIONAL_NONE || + inst->conditional_mod == BRW_CONDITIONAL_GE || + inst->conditional_mod == BRW_CONDITIONAL_L); + + fs_reg temp = inst->src[0]; + inst->src[0] = inst->src[1]; + inst->src[1] = temp; + + /* If this was predicated, flipping operands means we also need to flip + * the predicate. + */ + if (inst->conditional_mod == BRW_CONDITIONAL_NONE) + inst->predicate_inverse = !inst->predicate_inverse; + } + if (debug) { for (int i = 0; i < table.len; i++) { struct imm *imm = &table.imm[i]; diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index ba0ebc5..37737e1 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -1040,21 +1040,26 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) if (i == 1) { inst->src[i] = val; progress = true; - } else if (i == 0 && inst->src[1].file != IMM && - (inst->conditional_mod == BRW_CONDITIONAL_NONE || - /* Only GE and L are commutative. */ - inst->conditional_mod == BRW_CONDITIONAL_GE || - inst->conditional_mod == BRW_CONDITIONAL_L)) { - inst->src[0] = inst->src[1]; - inst->src[1] = val; + } else if (i == 0) { + if (inst->src[1].file != IMM && + (inst->conditional_mod == BRW_CONDITIONAL_NONE || + /* Only GE and L are commutative. */ + inst->conditional_mod == BRW_CONDITIONAL_GE || + inst->conditional_mod == BRW_CONDITIONAL_L)) { + inst->src[0] = inst->src[1]; + inst->src[1] = val; - /* If this was predicated, flipping operands means - * we also need to flip the predicate. - */ - if (inst->conditional_mod == BRW_CONDITIONAL_NONE) { - inst->predicate_inverse = - !inst->predicate_inverse; + /* If this was predicated, flipping operands means + * we also need to flip the predicate. + */ + if (inst->conditional_mod == BRW_CONDITIONAL_NONE) { + inst->predicate_inverse = + !inst->predicate_inverse; + } + } else { + inst->src[0] = val; } + progress = true; } break; -- 2.7.4