* effectively inlining uniform values in the IR. The main benefit is register
* usage decrease leading to better SMT (hyperthreading). It's accomplished
* by targetting uniforms that determine whether a conditional branch is
- * taken.
+ * taken or a loop can be unrolled.
*
- * Only uniforms used in if conditions are analyzed.
+ * Only uniforms used in these places are analyzed:
+ * 1. if condition
+ * 2. loop terminator
+ * 3. init and update value of induction variable used in loop terminator
*
* nir_find_inlinable_uniforms finds uniforms that can be inlined and stores
* that information in shader_info.
* (uniforms must be lowered to load_ubo before calling this)
*/
-#include "compiler/nir/nir_builder.h"
+#include "nir_builder.h"
+#include "nir_loop_analyze.h"
/* Maximum value in shader_info::inlinable_uniform_dw_offsets[] */
#define MAX_OFFSET (UINT16_MAX * 4)
}
}
+static bool
+is_induction_variable(const nir_src *src, int component, nir_loop_info *info,
+ uint32_t *uni_offsets, unsigned *num_offsets)
+{
+ if (!src->is_ssa)
+ return false;
+
+ assert(component < src->ssa->num_components);
+
+ /* Return true for induction variable (ie. i in for loop) */
+ for (int i = 0; i < info->num_induction_vars; i++) {
+ nir_loop_induction_variable *var = info->induction_vars + i;
+ if (var->def == src->ssa) {
+ /* Induction variable should have constant initial value (ie. i = 0),
+ * constant update value (ie. i++) and constant end condition
+ * (ie. i < 10), so that we know the exact loop count for unrolling
+ * the loop.
+ *
+ * Add uniforms need to be inlined for this induction variable's
+ * initial and update value to be constant, for example:
+ *
+ * for (i = init; i < count; i += step)
+ *
+ * We collect uniform "init" and "step" here.
+ */
+ if (var->init_src) {
+ if (!src_only_uses_uniforms(var->init_src, component,
+ uni_offsets, num_offsets))
+ return false;
+ }
+
+ if (var->update_src) {
+ nir_alu_src *alu_src = var->update_src;
+ if (!src_only_uses_uniforms(&alu_src->src,
+ alu_src->swizzle[component],
+ uni_offsets, num_offsets))
+ return false;
+ }
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
static void
-add_inlinable_uniforms(const nir_src *cond, uint32_t *uni_offsets,
- unsigned *num_offsets)
+add_inlinable_uniforms(const nir_src *cond, nir_loop_info *info,
+ uint32_t *uni_offsets, unsigned *num_offsets)
{
unsigned new_num = *num_offsets;
/* If condition SSA is always scalar, so component is 0. */
unsigned component = 0;
+ /* Allow induction variable which means a loop terminator. */
+ if (info) {
+ nir_ssa_scalar cond_scalar = {cond->ssa, 0};
+
+ /* Limit terminator condition to loop unroll support case which is a simple
+ * comparison (ie. "i < count" is supported, but "i + 1 < count" is not).
+ */
+ if (nir_is_supported_terminator_condition(cond_scalar)) {
+ nir_alu_instr *alu = nir_instr_as_alu(cond->ssa->parent_instr);
+
+ /* One side of comparison is induction variable, the other side is
+ * only uniform.
+ */
+ for (int i = 0; i < 2; i++) {
+ if (is_induction_variable(&alu->src[i].src, alu->src[i].swizzle[0],
+ info, uni_offsets, &new_num)) {
+ cond = &alu->src[1 - i].src;
+ component = alu->src[1 - i].swizzle[0];
+ break;
+ }
+ }
+ }
+ }
+
/* Only update uniform number when all uniforms in the expression
* can be inlined. Partially inline uniforms can't lower if/loop.
*
*num_offsets = new_num;
}
+static void
+process_node(nir_cf_node *node, nir_loop_info *info,
+ uint32_t *uni_offsets, unsigned *num_offsets)
+{
+ switch (node->type) {
+ case nir_cf_node_if: {
+ nir_if *if_node = nir_cf_node_as_if(node);
+ const nir_src *cond = &if_node->condition;
+ add_inlinable_uniforms(cond, info, uni_offsets, num_offsets);
+
+ /* Do not pass loop info down so only alow induction variable
+ * in loop terminator "if":
+ *
+ * for (i = 0; true; i++)
+ * if (i == count)
+ * if (i == num)
+ * <no break>
+ * break
+ *
+ * so "num" won't be inlined due to the "if" is not a
+ * terminator.
+ */
+ info = NULL;
+
+ foreach_list_typed(nir_cf_node, nested_node, node, &if_node->then_list)
+ process_node(nested_node, info, uni_offsets, num_offsets);
+ foreach_list_typed(nir_cf_node, nested_node, node, &if_node->else_list)
+ process_node(nested_node, info, uni_offsets, num_offsets);
+ break;
+ }
+
+ case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(node);
+
+ /* Replace loop info, no nested loop info currently:
+ *
+ * for (i = 0; i < count0; i++)
+ * for (j = 0; j < count1; j++)
+ * if (i == num)
+ *
+ * so "num" won't be inlined due to "i" is an induction
+ * variable of upper loop.
+ */
+ info = loop->info;
+
+ foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) {
+ bool is_terminator = false;
+ list_for_each_entry(nir_loop_terminator, terminator,
+ &info->loop_terminator_list,
+ loop_terminator_link) {
+ if (nested_node == &terminator->nif->cf_node) {
+ is_terminator = true;
+ break;
+ }
+ }
+
+ /* Allow induction variables for terminator "if" only:
+ *
+ * for (i = 0; i < count; i++)
+ * if (i == num)
+ * <no break>
+ *
+ * so "num" won't be inlined due to the "if" is not a
+ * terminator.
+ */
+ nir_loop_info *use_info = is_terminator ? info : NULL;
+ process_node(nested_node, use_info, uni_offsets, num_offsets);
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
void
nir_find_inlinable_uniforms(nir_shader *shader)
{
nir_foreach_function(function, shader) {
if (function->impl) {
- foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
- switch (node->type) {
- case nir_cf_node_if: {
- const nir_src *cond = &nir_cf_node_as_if(node)->condition;
- add_inlinable_uniforms(cond, uni_offsets, &num_offsets);
- break;
- }
-
- case nir_cf_node_loop:
- /* TODO: handle loops if we want to unroll them at draw time */
- break;
+ nir_metadata_require(function->impl, nir_metadata_loop_analysis, nir_var_all);
- default:
- break;
- }
- }
+ foreach_list_typed(nir_cf_node, node, node, &function->impl->body)
+ process_node(node, NULL, uni_offsets, &num_offsets);
}
}