From ce3f46397d77141156f81dd7fcf06fb936e2b0ef Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 3 Feb 2015 21:12:28 +0200 Subject: [PATCH] i965/fs: Handle CMP.nz ... 0 and AND.nz ... 1 similarly in cmod propagation Espically on platforms that do not natively generate 0u and ~0u for Boolean results, we generate a lot of sequences where a CMP is followed by an AND with 1. emit_bool_to_cond_code does this, for example. On ILK, this results in a sequence like: add(8) g3<1>F g8<8,8,1>F -g4<0,1,0>F cmp.l.f0(8) g3<1>D g3<8,8,1>F 0F and.nz.f0(8) null g3<8,8,1>D 1D (+f0) iff(8) Jump: 6 The AND.nz is obviously redundant. By propagating the cmod, we can instead generate add.l.f0(8) null g8<8,8,1>F -g4<0,1,0>F (+f0) iff(8) Jump: 6 Existing code already handles the propagation from the CMP to the ADD. Shader-db results: GM45 (0x2A42): total instructions in shared programs: 3550829 -> 3550788 (-0.00%) instructions in affected programs: 10028 -> 9987 (-0.41%) helped: 24 Iron Lake (0x0046): total instructions in shared programs: 4993146 -> 4993105 (-0.00%) instructions in affected programs: 9675 -> 9634 (-0.42%) helped: 24 Ivy Bridge (0x0166): total instructions in shared programs: 6291870 -> 6291794 (-0.00%) instructions in affected programs: 17914 -> 17838 (-0.42%) helped: 48 Haswell (0x0426): total instructions in shared programs: 5779256 -> 5779180 (-0.00%) instructions in affected programs: 16694 -> 16618 (-0.46%) helped: 48 Broadwell (0x162E): total instructions in shared programs: 6823088 -> 6823014 (-0.00%) instructions in affected programs: 15824 -> 15750 (-0.47%) helped: 46 No chage on Sandy Bridge or on any platform when NIR is used. v2: Add unit tests suggested by Matt. Remove spurious writes_flag() check on scan_inst when scan_inst is known to be BRW_OPCODE_CMP (also suggested by Matt). v3: Fix some comments and remove some explicit int() casts in fs_reg constructors in the unit tests. Both suggested by Matt. Signed-off-by: Ian Romanick Reviewed-by: Matt Turner --- .../drivers/dri/i965/brw_fs_cmod_propagation.cpp | 31 +++++- .../drivers/dri/i965/test_fs_cmod_propagation.cpp | 105 +++++++++++++++++++++ 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp index d0ca2f9..1935f06 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp @@ -57,7 +57,8 @@ opt_cmod_propagation_local(bblock_t *block) foreach_inst_in_block_reverse_safe(fs_inst, inst, block) { ip--; - if ((inst->opcode != BRW_OPCODE_CMP && + if ((inst->opcode != BRW_OPCODE_AND && + inst->opcode != BRW_OPCODE_CMP && inst->opcode != BRW_OPCODE_MOV) || inst->predicate != BRW_PREDICATE_NONE || !inst->dst.is_null() || @@ -65,6 +66,19 @@ opt_cmod_propagation_local(bblock_t *block) inst->src[0].abs) continue; + /* Only an AND.NZ can be propagated. Many AND.Z instructions are + * generated (for ir_unop_not in fs_visitor::emit_bool_to_cond_code). + * Propagating those would require inverting the condition on the CMP. + * This changes both the flag value and the register destination of the + * CMP. That result may be used elsewhere, so we can't change its value + * on a whim. + */ + if (inst->opcode == BRW_OPCODE_AND && + !(inst->src[1].is_one() && + inst->conditional_mod == BRW_CONDITIONAL_NZ && + !inst->src[0].negate)) + continue; + if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) continue; @@ -80,6 +94,21 @@ opt_cmod_propagation_local(bblock_t *block) scan_inst->dst.reg_offset != inst->src[0].reg_offset) break; + /* This must be done before the dst.type check because the result + * type of the AND will always be D, but the result of the CMP + * could be anything. The assumption is that the AND is just + * figuring out what the result of the previous comparison was + * instead of doing a new comparison with a different type. + */ + if (inst->opcode == BRW_OPCODE_AND) { + if (scan_inst->opcode == BRW_OPCODE_CMP) { + inst->remove(block); + progress = true; + } + + break; + } + /* Comparisons operate differently for ints and floats */ if (scan_inst->dst.type != inst->dst.type) break; diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index cb92abf..1ce14f8 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -449,3 +449,108 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero) EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); } + +TEST_F(cmod_propagation_test, andnz_one) +{ + fs_reg dest = v->vgrf(glsl_type::int_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(0.0f); + fs_reg one(1); + + v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) + ->conditional_mod = BRW_CONDITIONAL_L; + v->emit(BRW_OPCODE_AND, v->reg_null_f, dest, one) + ->conditional_mod = BRW_CONDITIONAL_NZ; + + /* = Before = + * 0: cmp.l.f0(8) dest:F src0:F 0F + * 1: and.nz.f0(8) null:D dest:D 1D + * + * = After = + * 0: cmp.l.f0(8) dest:F src0:F 0F + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_TRUE(retype(dest, BRW_REGISTER_TYPE_F) + .equals(instruction(block0, 0)->dst)); +} + +TEST_F(cmod_propagation_test, andnz_non_one) +{ + fs_reg dest = v->vgrf(glsl_type::int_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(0.0f); + fs_reg nonone(38); + + v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) + ->conditional_mod = BRW_CONDITIONAL_L; + v->emit(BRW_OPCODE_AND, v->reg_null_f, dest, nonone) + ->conditional_mod = BRW_CONDITIONAL_NZ; + + /* = Before = + * 0: cmp.l.f0(8) dest:F src0:F 0F + * 1: and.nz.f0(8) null:D dest:D 38D + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, andz_one) +{ + fs_reg dest = v->vgrf(glsl_type::int_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(0.0f); + fs_reg one(1); + + v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) + ->conditional_mod = BRW_CONDITIONAL_L; + v->emit(BRW_OPCODE_AND, v->reg_null_f, dest, one) + ->conditional_mod = BRW_CONDITIONAL_Z; + + /* = Before = + * 0: cmp.l.f0(8) dest:F src0:F 0F + * 1: and.z.f0(8) null:D dest:D 1D + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod); +} -- 2.7.4