For instance, to load uniform data with the LSC we usually rely on
tranpose messages which have to execute in SIMD1. Those end up being
considered as partial writes so within loops their life span spread to
the whole loop, increasing register pressure.
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21867>
bool
fs_inst::is_partial_write() const
{
- return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
- (this->exec_size * type_sz(this->dst.type)) < 32 ||
- !this->dst.is_contiguous() ||
- this->dst.offset % REG_SIZE != 0);
+ if (this->predicate && this->opcode != BRW_OPCODE_SEL)
+ return true;
+
+ if (this->dst.offset % REG_SIZE != 0)
+ return true;
+
+ /* SEND instructions always write whole registers */
+ if (this->opcode == SHADER_OPCODE_SEND)
+ return false;
+
+ return this->exec_size * type_sz(this->dst.type) < 32 ||
+ !this->dst.is_contiguous();
}
unsigned