From 56774e63028b2997a7d8c0abb5009a4c79f9a453 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Alejandro=20Pi=C3=B1eiro?= Date: Tue, 20 Oct 2015 13:08:09 +0200 Subject: [PATCH] i965/vec4: select predicate based on writemask for sel emissions Equivalent to commit 8ac3b525c but with sel operations. In this case we select the PredCtrl based on the writemask. This patch helps on cases like this: 1: cmp.l.f0.0 vgrf40.0.x:F, vgrf0.zzzz:F, vgrf7.xxxx:F 2: cmp.nz.f0.0 null:D, vgrf40.xxxx:D, 0D 3: (+f0.0) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD In this case, cmod propagation can't optimize instruction #2, because instructions #1 and #2 have different writemasks, and we can't update directly instruction #2 writemask because our code thinks that sel at instruction #3 reads all four channels of the flag, when it actually only reads .x. So, with this patch, the previous case becames this: 1: cmp.l.f0.0 vgrf40.0.x:F, vgrf0.zzzz:F, vgrf7.xxxx:F 2: cmp.nz.f0.0 null:D, vgrf40.xxxx:D, 0D 3: (+f0.0.x) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD Now only the x channel of the flag is used, allowing dead code eliminate to update the writemask at the second instruction: 1: cmp.l.f0.0 vgrf40.0.x:F, vgrf0.zzzz:F, vgrf7.xxxx:F 2: cmp.nz.f0.0 null.x:D, vgrf40.xxxx:D, 0D 3: (+f0.0.x) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD So now cmod propagation can simplify out #2: 1: cmp.l.f0.0 vgrf40.0.x:F, attr18.wwww:F, vgrf7.xxxx:F 2: (+f0.0.x) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD Shader-db numbers: total instructions in shared programs: 6235835 -> 6228008 (-0.13%) instructions in affected programs: 219850 -> 212023 (-3.56%) total loops in shared programs: 1979 -> 1979 (0.00%) helped: 1192 HURT: 0 --- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 8ca8ddb..b848810 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1407,7 +1407,23 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_bcsel: emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]); - inst->predicate = BRW_PREDICATE_NORMAL; + switch (dst.writemask) { + case WRITEMASK_X: + inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_X; + break; + case WRITEMASK_Y: + inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Y; + break; + case WRITEMASK_Z: + inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Z; + break; + case WRITEMASK_W: + inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_W; + break; + default: + inst->predicate = BRW_PREDICATE_NORMAL; + break; + } break; case nir_op_fdot_replicated2: -- 2.7.4