}
static bool
-brw_nir_opt_peephole_ffma_block(nir_builder *b, nir_block *block)
+brw_nir_opt_peephole_ffma_instr(nir_builder *b,
+ nir_instr *instr,
+ UNUSED void *cb_data)
{
- bool progress = false;
-
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_alu)
- continue;
-
- nir_alu_instr *add = nir_instr_as_alu(instr);
- if (add->op != nir_op_fadd)
- continue;
-
- assert(add->dest.dest.is_ssa);
- if (add->exact)
- continue;
-
- assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
-
- /* This, is the case a + a. We would rather handle this with an
- * algebraic reduction than fuse it. Also, we want to only fuse
- * things where the multiply is used only once and, in this case,
- * it would be used twice by the same instruction.
- */
- if (add->src[0].src.ssa == add->src[1].src.ssa)
- continue;
+ if (instr->type != nir_instr_type_alu)
+ return false;
- nir_alu_instr *mul;
- uint8_t add_mul_src, swizzle[4];
- bool negate, abs;
- for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
- for (unsigned i = 0; i < 4; i++)
- swizzle[i] = i;
+ nir_alu_instr *add = nir_instr_as_alu(instr);
+ if (add->op != nir_op_fadd)
+ return false;
- negate = false;
- abs = false;
+ assert(add->dest.dest.is_ssa);
+ if (add->exact)
+ return false;
- mul = get_mul_for_src(&add->src[add_mul_src],
- add->dest.dest.ssa.num_components,
- swizzle, &negate, &abs);
+ assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
- if (mul != NULL)
- break;
- }
+ /* This, is the case a + a. We would rather handle this with an
+ * algebraic reduction than fuse it. Also, we want to only fuse
+ * things where the multiply is used only once and, in this case,
+ * it would be used twice by the same instruction.
+ */
+ if (add->src[0].src.ssa == add->src[1].src.ssa)
+ return false;
- if (mul == NULL)
- continue;
+ nir_alu_instr *mul;
+ uint8_t add_mul_src, swizzle[4];
+ bool negate, abs;
+ for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
+ for (unsigned i = 0; i < 4; i++)
+ swizzle[i] = i;
- unsigned bit_size = add->dest.dest.ssa.bit_size;
+ negate = false;
+ abs = false;
- nir_ssa_def *mul_src[2];
- mul_src[0] = mul->src[0].src.ssa;
- mul_src[1] = mul->src[1].src.ssa;
+ mul = get_mul_for_src(&add->src[add_mul_src],
+ add->dest.dest.ssa.num_components,
+ swizzle, &negate, &abs);
- /* If any of the operands of the fmul and any of the fadd is a constant,
- * we bypass because it will be more efficient as the constants will be
- * propagated as operands, potentially saving two load_const instructions.
- */
- if (any_alu_src_is_a_constant(mul->src) &&
- any_alu_src_is_a_constant(add->src)) {
- continue;
- }
+ if (mul != NULL)
+ break;
+ }
- b->cursor = nir_before_instr(&add->instr);
+ if (mul == NULL)
+ return false;
- if (abs) {
- for (unsigned i = 0; i < 2; i++)
- mul_src[i] = nir_fabs(b, mul_src[i]);
- }
+ unsigned bit_size = add->dest.dest.ssa.bit_size;
- if (negate)
- mul_src[0] = nir_fneg(b, mul_src[0]);
+ nir_ssa_def *mul_src[2];
+ mul_src[0] = mul->src[0].src.ssa;
+ mul_src[1] = mul->src[1].src.ssa;
- nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma);
- ffma->dest.saturate = add->dest.saturate;
- ffma->dest.write_mask = add->dest.write_mask;
+ /* If any of the operands of the fmul and any of the fadd is a constant,
+ * we bypass because it will be more efficient as the constants will be
+ * propagated as operands, potentially saving two load_const instructions.
+ */
+ if (any_alu_src_is_a_constant(mul->src) &&
+ any_alu_src_is_a_constant(add->src)) {
+ return false;
+ }
- for (unsigned i = 0; i < 2; i++) {
- ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
- for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
- ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
- }
- nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src]);
+ b->cursor = nir_before_instr(&add->instr);
- assert(add->dest.dest.is_ssa);
+ if (abs) {
+ for (unsigned i = 0; i < 2; i++)
+ mul_src[i] = nir_fabs(b, mul_src[i]);
+ }
- nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
- add->dest.dest.ssa.num_components,
- bit_size, NULL);
- nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
- &ffma->dest.dest.ssa);
+ if (negate)
+ mul_src[0] = nir_fneg(b, mul_src[0]);
- nir_builder_instr_insert(b, &ffma->instr);
- assert(list_is_empty(&add->dest.dest.ssa.uses));
- nir_instr_remove(&add->instr);
+ nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma);
+ ffma->dest.saturate = add->dest.saturate;
+ ffma->dest.write_mask = add->dest.write_mask;
- progress = true;
+ for (unsigned i = 0; i < 2; i++) {
+ ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
+ for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
+ ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
}
+ nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src]);
- return progress;
-}
-
-static bool
-brw_nir_opt_peephole_ffma_impl(nir_function_impl *impl)
-{
- bool progress = false;
+ assert(add->dest.dest.is_ssa);
- nir_builder builder;
- nir_builder_init(&builder, impl);
+ nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
+ add->dest.dest.ssa.num_components,
+ bit_size, NULL);
+ nir_ssa_def_rewrite_uses(&add->dest.dest.ssa, &ffma->dest.dest.ssa);
- nir_foreach_block(block, impl) {
- progress |= brw_nir_opt_peephole_ffma_block(&builder, block);
- }
-
- if (progress) {
- nir_metadata_preserve(impl, nir_metadata_block_index |
- nir_metadata_dominance);
- } else {
- nir_metadata_preserve(impl, nir_metadata_all);
- }
+ nir_builder_instr_insert(b, &ffma->instr);
+ assert(list_is_empty(&add->dest.dest.ssa.uses));
+ nir_instr_remove(&add->instr);
- return progress;
+ return true;
}
bool
brw_nir_opt_peephole_ffma(nir_shader *shader)
{
- bool progress = false;
-
- nir_foreach_function(function, shader) {
- if (function->impl)
- progress |= brw_nir_opt_peephole_ffma_impl(function->impl);
- }
-
- return progress;
+ return nir_shader_instructions_pass(shader, brw_nir_opt_peephole_ffma_instr,
+ nir_metadata_block_index |
+ nir_metadata_dominance,
+ NULL);
}