int use_count; /* currently just updated/used by cp */
- /* Used during CP and RA stages. For collect and shader inputs/
- * outputs where we need a sequence of consecutive registers,
- * keep track of each src instructions left (ie 'n-1') and right
- * (ie 'n+1') neighbor. The front-end must insert enough mov's
- * to ensure that each instruction has at most one left and at
- * most one right neighbor. During the copy-propagation pass,
- * we only remove mov's when we can preserve this constraint.
- * And during the RA stage, we use the neighbor information to
- * allocate a block of registers in one shot.
- *
- * TODO: maybe just add something like:
- * struct ir3_instruction_ref {
- * struct ir3_instruction *instr;
- * unsigned cnt;
- * }
- *
- * Or can we get away without the refcnt stuff? It seems like
- * it should be overkill.. the problem is if, potentially after
- * already eliminating some mov's, if you have a single mov that
- * needs to be grouped with it's neighbors in two different
- * places (ex. shader output and a collect).
- */
- struct {
- struct ir3_instruction *left, *right;
- uint16_t left_cnt, right_cnt;
- } cp;
-
/* an instruction can reference at most one address register amongst
* it's src/dst registers. Beyond that, you need to insert mov's.
*
int line;
};
-static inline struct ir3_instruction *
-ir3_neighbor_first(struct ir3_instruction *instr)
-{
- int cnt = 0;
- while (instr->cp.left) {
- instr = instr->cp.left;
- if (++cnt > 0xffff) {
- debug_assert(0);
- break;
- }
- }
- return instr;
-}
-
-static inline int ir3_neighbor_count(struct ir3_instruction *instr)
-{
- int num = 1;
-
- debug_assert(!instr->cp.left);
-
- while (instr->cp.right) {
- num++;
- instr = instr->cp.right;
- if (num > 0xffff) {
- debug_assert(0);
- break;
- }
- }
-
- return num;
-}
-
struct ir3 {
struct ir3_compiler *compiler;
gl_shader_stage type;
ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
struct ir3_instruction *src, unsigned base, unsigned n)
{
- struct ir3_instruction *prev = NULL;
-
if ((n == 1) && (src->regs[0]->wrmask == 0x1) &&
/* setup_input needs ir3_split_dest to generate a SPLIT instruction */
src->opc != OPC_META_INPUT) {
__ssa_src(split, src, flags);
split->split.off = i + base;
- if (prev) {
- split->cp.left = prev;
- split->cp.left_cnt++;
- prev->cp.right = split;
- prev->cp.right_cnt++;
- }
- prev = split;
-
if (src->regs[0]->wrmask & (1 << (i + base)))
dst[j++] = split;
}
IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
return false;
- /* If src is coming from fanout/split (ie. one component of a
- * texture fetch, etc) and we have constraints on swizzle of
- * destination, then skip it.
- *
- * We could possibly do a bit better, and copy-propagation if
- * we can CP all components that are being fanned out.
- */
- if (src_instr->opc == OPC_META_SPLIT) {
- if (!dst_instr)
- return false;
- if (dst_instr->opc == OPC_META_COLLECT)
- return false;
- if (dst_instr->cp.left || dst_instr->cp.right)
- return false;
- }
-
return true;
}
return false;
*/
if (src && is_tex_or_prefetch(src) && (src->regs[0]->wrmask > 1)) {
src->regs[0]->wrmask &= ~(1 << instr->split.off);
-
- /* prune no-longer needed right-neighbors. We could
- * probably do the same for left-neighbors (ie. tex
- * fetch that only need .yw components), but that
- * makes RA a bit more confusing than it already is
- */
- struct ir3_instruction *n = instr;
- while (n && n->cp.right)
- n = n->cp.right;
- while (n->flags & IR3_INSTR_UNUSED) {
- n = n->cp.left;
- if (!n)
- break;
- n->cp.right = NULL;
- }
}
}
printf("]");
}
- if (instr->cp.left) {
- printf(", left=_");
- printf("[");
- print_instr_name(instr->cp.left, false);
- printf("]");
- }
-
- if (instr->cp.right) {
- printf(", right=_");
- printf("[");
- print_instr_name(instr->cp.right, false);
- printf("]");
- }
-
if (instr->opc == OPC_META_SPLIT) {
printf(", off=%d", instr->split.off);
} else if (instr->opc == OPC_META_TEX_PREFETCH) {