From 6fc5dd5b6a3e4884b475bae7507ea59e59800e67 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 16 May 2016 01:03:43 -0700 Subject: [PATCH] i965/fs: Calculate the (un)spill block size correctly. Currently the spilling code attempts to guess the scratch message block size from the dispatch width of the shader, which is plain wrong for SIMD-lowered instructions (frequently but not exclusively encountered in SIMD32 shaders) or for instructions with register region data types of size other than 32 bit. Instead try to use the SIMD component size of the instruction which in some cases will allow the dataport to apply the correct channel mask to the scratch data read or written. In the spill case the block size needs to be clamped to the number of MRF registers reserved for spilling. In the unspill case I didn't even bother because we currently have no 100% accurate way to determine whether a source region is per-channel or whether it contains things like headers that don't respect channel boundaries -- That's fine, because the unspill is marked force_writemask_all we can just use the largest allowable scratch message size. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 4b92113..874013e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -942,8 +942,14 @@ fs_visitor::spill_reg(int spill_reg) inst->src[i].nr = unspill_dst.nr; inst->src[i].reg_offset = 0; + /* We read the largest power-of-two divisor of the register count + * (because only POT scratch read blocks are allowed by the + * hardware) up to the maximum supported block size. + * XXX - Bump the limit when the generator code is ready for + * 32-wide spills. + */ const unsigned width = - dispatch_width == 16 && regs_read % 2 == 0 ? 16 : 8; + MIN2(16, 1u << (ffs(MAX2(1, regs_read) * 8) - 1)); /* Set exec_all() on unspill messages under the (rather * pessimistic) assumption that there is no one-to-one @@ -974,8 +980,16 @@ fs_visitor::spill_reg(int spill_reg) inst->no_dd_clear = false; inst->no_dd_check = false; - const unsigned width = - dispatch_width == 16 && inst->regs_written % 2 == 0 ? 16 : 8; + /* Calculate the execution width of the scratch messages (which work + * in terms of 32 bit components so we have a fixed number of eight + * channels per spilled register). We attempt to write one + * exec_size-wide component of the variable at a time without + * exceeding the maximum number of (fake) MRF registers reserved for + * spills. + */ + const unsigned width = 8 * MIN2( + DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE), + spill_max_size(this)); /* Spills should only write data initialized by the instruction for * whichever channels are enabled in the excution mask. If that's -- 2.7.4