From b031c643491a92a5574c7a4bd659df33f2d89bb6 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 17 Jan 2019 17:53:40 -0800 Subject: [PATCH] nir: Convert a bcsel with only phi node sources to a phi node v2: Remove the original ALU instruciton after all of its readers are modified to read the new ALU instruction. v3: Fix an issue where a bcsel that may not be executed on a loop iteration due to a break statement is converted to a phi (and therefore incorrectly "executed"). Noticed by Tim. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109216 Fixes: 8fb8ebfbb05 ("intel/compiler: More peephole select") Reviewed-by: Timothy Arceri --- src/compiler/nir/nir_opt_if.c | 220 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index be0a2d5..9afb901 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -542,6 +542,225 @@ opt_split_alu_of_phi(nir_builder *b, nir_loop *loop) return progress; } +/** + * Get the SSA value from a phi node that corresponds to a specific block + */ +static nir_ssa_def * +ssa_for_phi_from_block(nir_phi_instr *phi, nir_block *block) +{ + nir_foreach_phi_src(src, phi) { + if (src->pred == block) + return src->src.ssa; + } + + assert(!"Block is not a predecessor of phi."); + return NULL; +} + +/** + * Simplify a bcsel whose sources are all phi nodes from the loop header block + * + * bcsel instructions in a loop that meet the following criteria can be + * converted to phi nodes: + * + * - The loop has no continue instructions other than the "natural" continue + * at the bottom of the loop. + * + * - All of the sources of the bcsel are phi nodes in the header block of the + * loop. + * + * - The phi node representing the condition of the bcsel instruction chooses + * only constant values. + * + * The contant value from the condition will select one of the other sources + * when entered from outside the loop and the remaining source when entered + * from the continue block. Since each of these sources is also a phi node in + * the header block, the value of the phi node can be "evaluated." These + * evaluated phi nodes provide the sources for a new phi node. All users of + * the bcsel result are updated to use the phi node result. + * + * The replacement transforms loops like: + * + * vec1 32 ssa_7 = undefined + * vec1 32 ssa_8 = load_const (0x00000001) + * vec1 32 ssa_9 = load_const (0x000000c8) + * vec1 32 ssa_10 = load_const (0x00000000) + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 block_4 + * vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14 + * vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15 + * vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25 + * vec1 32 ssa_14 = b32csel ssa_12, ssa_13, ssa_11 + * vec1 32 ssa_16 = ige32 ssa_14, ssa_9 + * ... + * vec1 32 ssa_15 = load_const (0xffffffff) + * ... + * vec1 32 ssa_25 = iadd ssa_14, ssa_8 + * // succs: block_1 + * } + * + * into: + * + * vec1 32 ssa_7 = undefined + * vec1 32 ssa_8 = load_const (0x00000001) + * vec1 32 ssa_9 = load_const (0x000000c8) + * vec1 32 ssa_10 = load_const (0x00000000) + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 block_4 + * vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14 + * vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15 + * vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25 + * vec1 32 sss_26 = phi block_0: ssa_1, block_4: ssa_25 + * vec1 32 ssa_16 = ige32 ssa_26, ssa_9 + * ... + * vec1 32 ssa_15 = load_const (0xffffffff) + * ... + * vec1 32 ssa_25 = iadd ssa_26, ssa_8 + * // succs: block_1 + * } + * + * \note + * It may be possible modify this function to not require a phi node as the + * source of the bcsel that is selected when entering from outside the loop. + * The only restriction is that the source must be geneated outside the loop + * (since it will become the source of a phi node in the header block of the + * loop). + */ +static bool +opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop) +{ + bool progress = false; + nir_block *header_block = nir_loop_first_block(loop); + nir_block *const prev_block = + nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); + + /* It would be insane if this were not true */ + assert(_mesa_set_search(header_block->predecessors, prev_block)); + + /* The loop must have exactly one continue block which could be a block + * ending in a continue instruction or the "natural" continue from the + * last block in the loop back to the top. + */ + if (header_block->predecessors->entries != 2) + return false; + + /* We can move any bcsel that can guaranteed to execut on every iteration + * of a loop. For now this is accomplished by only taking bcsels from the + * header_block. In the future, this could be expanced to include any + * bcsel that must come before any break. + * + * For more details, see + * https://gitlab.freedesktop.org/mesa/mesa/merge_requests/170#note_110305 + */ + nir_foreach_instr_safe(instr, header_block) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *const bcsel = nir_instr_as_alu(instr); + if (bcsel->op != nir_op_bcsel && + bcsel->op != nir_op_b32csel && + bcsel->op != nir_op_fcsel) + continue; + + bool match = true; + for (unsigned i = 0; i < 3; i++) { + /* FINISHME: The abs and negate cases could be handled by adding + * move instructions at the bottom of the continue block and more + * phi nodes in the header_block. + */ + if (!bcsel->src[i].src.is_ssa || + bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi || + bcsel->src[i].src.ssa->parent_instr->block != header_block || + bcsel->src[i].negate || bcsel->src[i].abs) { + match = false; + break; + } + } + + if (!match) + continue; + + nir_phi_instr *const cond_phi = + nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr); + + uint32_t entry_val = 0, continue_val = 0; + if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi, + prev_block, + &entry_val, + &continue_val)) + continue; + + /* If they both execute or both don't execute, this is a job for + * nir_dead_cf, not this pass. + */ + if ((entry_val && continue_val) || (!entry_val && !continue_val)) + continue; + + const unsigned entry_src = entry_val ? 1 : 2; + const unsigned continue_src = entry_val ? 2 : 1; + + /* Create a new phi node that selects the value for prev_block from + * the bcsel source that is selected by entry_val and the value for + * continue_block from the other bcsel source. Both sources have + * already been verified to be phi nodes. + */ + nir_block *const continue_block = find_continue_block(loop); + nir_phi_instr *const phi = nir_phi_instr_create(b->shader); + nir_phi_src *phi_src; + + phi_src = ralloc(phi, nir_phi_src); + phi_src->pred = prev_block; + phi_src->src = + nir_src_for_ssa(ssa_for_phi_from_block(nir_instr_as_phi(bcsel->src[entry_src].src.ssa->parent_instr), + prev_block)); + exec_list_push_tail(&phi->srcs, &phi_src->node); + + phi_src = ralloc(phi, nir_phi_src); + phi_src->pred = continue_block; + phi_src->src = + nir_src_for_ssa(ssa_for_phi_from_block(nir_instr_as_phi(bcsel->src[continue_src].src.ssa->parent_instr), + continue_block)); + exec_list_push_tail(&phi->srcs, &phi_src->node); + + nir_ssa_dest_init(&phi->instr, + &phi->dest, + nir_dest_num_components(bcsel->dest.dest), + nir_dest_bit_size(bcsel->dest.dest), + NULL); + + b->cursor = nir_after_phis(header_block); + nir_builder_instr_insert(b, &phi->instr); + + /* Modify all readers of the bcsel instruction to read the result of + * the phi. + */ + nir_foreach_use_safe(use_src, &bcsel->dest.dest.ssa) { + nir_instr_rewrite_src(use_src->parent_instr, + use_src, + nir_src_for_ssa(&phi->dest.ssa)); + } + + nir_foreach_if_use_safe(use_src, &bcsel->dest.dest.ssa) { + nir_if_rewrite_condition(use_src->parent_if, + nir_src_for_ssa(&phi->dest.ssa)); + } + + /* Since the original bcsel instruction no longer has any readers, + * just remove it. + */ + nir_instr_remove_v(&bcsel->instr); + ralloc_free(bcsel); + + progress = true; + } + + return progress; +} + static bool is_block_empty(nir_block *block) { @@ -1109,6 +1328,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list) case nir_cf_node_loop: { nir_loop *loop = nir_cf_node_as_loop(cf_node); progress |= opt_if_cf_list(b, &loop->body); + progress |= opt_simplify_bcsel_of_phi(b, loop); progress |= opt_peel_loop_initial_if(loop); progress |= opt_if_loop_last_continue(loop); break; -- 2.7.4