From 8531f943d9aac13489a02e5a5b4bfa381c465a44 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 1 Sep 2016 22:31:43 -0700 Subject: [PATCH] i965/vec4: Fix copy propagation for non-register-aligned regions. This prevents it from trying to propagate a copy through a register-misaligned region. MOV instructions with a misaligned destination shouldn't be treated as a direct GRF copy, because they only define the destination GRFs partially. Also fix the interference check implemented with is_channel_updated() to consider overlapping regions with different register offset to interfere, since the writemask check implemented in the function is only valid under the assumption that the source and destination regions are aligned component by component. Reviewed-by: Iago Toral Quiroga --- src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index fe76dea..545f4c7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -46,6 +46,7 @@ is_direct_copy(vec4_instruction *inst) return (inst->opcode == BRW_OPCODE_MOV && !inst->predicate && inst->dst.file == VGRF && + inst->dst.offset % REG_SIZE == 0 && !inst->dst.reladdr && !inst->src[0].reladdr && (inst->dst.type == inst->src[0].type || @@ -73,7 +74,8 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch) return false; return regions_overlap(*src, REG_SIZE, inst->dst, inst->size_written) && - inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)); + (inst->dst.offset != src->offset || + inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch))); } static bool @@ -436,8 +438,9 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) inst->src[i].reladdr) continue; - /* We only handle single-register copies. */ - if (inst->size_read(i) != REG_SIZE) + /* We only handle register-aligned single GRF copies. */ + if (inst->size_read(i) != REG_SIZE || + inst->src[i].offset % REG_SIZE) continue; const unsigned reg = (alloc.offsets[inst->src[i].nr] + -- 2.7.4