From e392dd8237918fff27737964cc132c4caba5e6d2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 18 Feb 2022 12:49:06 -0500 Subject: [PATCH] pan/bi: Promote MUX to CSEL in the scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Helps scheduling, and makes scheduling more predictable when deciding between MUX and CSEL. total tuples in shared programs: 1523328 -> 1516256 (-0.46%) tuples in affected programs: 509800 -> 502728 (-1.39%) helped: 1977 HURT: 181 helped stats (abs) min: 1.0 max: 48.0 x̄: 3.71 x̃: 2 helped stats (rel) min: 0.04% max: 14.29% x̄: 1.98% x̃: 1.28% HURT stats (abs) min: 1.0 max: 5.0 x̄: 1.43 x̃: 1 HURT stats (rel) min: 0.14% max: 7.69% x̄: 1.40% x̃: 0.70% 95% mean confidence interval for tuples value: -3.47 -3.08 95% mean confidence interval for tuples %-change: -1.79% -1.60% Tuples are helped. total clauses in shared programs: 350552 -> 349906 (-0.18%) clauses in affected programs: 34839 -> 34193 (-1.85%) helped: 570 HURT: 49 helped stats (abs) min: 1.0 max: 16.0 x̄: 1.22 x̃: 1 helped stats (rel) min: 0.67% max: 20.00% x̄: 3.26% x̃: 2.22% HURT stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.92% max: 16.67% x̄: 4.31% x̃: 4.17% 95% mean confidence interval for clauses value: -1.13 -0.96 95% mean confidence interval for clauses %-change: -2.95% -2.38% Clauses are helped. total cycles in shared programs: 202589.37 -> 202512.25 (-0.04%) cycles in affected programs: 7644.46 -> 7567.33 (-1.01%) helped: 771 HURT: 147 helped stats (abs) min: 0.041665999999999315 max: 1.8333360000000027 x̄: 0.11 x̃: 0 helped stats (rel) min: 0.16% max: 14.29% x̄: 2.10% x̃: 1.35% HURT stats (abs) min: 0.041665999999999315 max: 0.3333340000000007 x̄: 0.07 x̃: 0 HURT stats (rel) min: 0.24% max: 7.41% x̄: 1.49% x̃: 1.11% 95% mean confidence interval for cycles value: -0.09 -0.07 95% mean confidence interval for cycles %-change: -1.69% -1.36% Cycles are helped. total arith in shared programs: 56755.96 -> 56585.50 (-0.30%) arith in affected programs: 18746.29 -> 18575.83 (-0.91%) helped: 1605 HURT: 352 helped stats (abs) min: 0.04166399999999726 max: 1.8333360000000027 x̄: 0.12 x̃: 0 helped stats (rel) min: 0.07% max: 20.00% x̄: 1.92% x̃: 1.12% HURT stats (abs) min: 0.041665999999999315 max: 0.3333340000000007 x̄: 0.06 x̃: 0 HURT stats (rel) min: 0.17% max: 33.33% x̄: 2.09% x̃: 1.08% 95% mean confidence interval for arith value: -0.09 -0.08 95% mean confidence interval for arith %-change: -1.34% -1.07% Arith are helped. total quadwords in shared programs: 1429737 -> 1424670 (-0.35%) quadwords in affected programs: 418175 -> 413108 (-1.21%) helped: 1682 HURT: 198 helped stats (abs) min: 1.0 max: 35.0 x̄: 3.17 x̃: 2 helped stats (rel) min: 0.04% max: 13.33% x̄: 1.72% x̃: 1.29% HURT stats (abs) min: 1.0 max: 5.0 x̄: 1.38 x̃: 1 HURT stats (rel) min: 0.15% max: 7.41% x̄: 1.30% x̃: 0.92% 95% mean confidence interval for quadwords value: -2.86 -2.53 95% mean confidence interval for quadwords %-change: -1.48% -1.32% Quadwords are helped. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_schedule.c | 58 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c index 4c6770e..33e3211 100644 --- a/src/panfrost/bifrost/bi_schedule.c +++ b/src/panfrost/bifrost/bi_schedule.c @@ -491,6 +491,58 @@ bi_can_iaddc(bi_instr *ins) } /* + * When MUX.i32 or MUX.v2i16 is used to multiplex entire sources, they can be + * replaced by CSEL as follows: + * + * MUX.neg(x, y, b) -> CSEL.s.lt(b, 0, x, y) + * MUX.int_zero(x, y, b) -> CSEL.i.eq(b, 0, x, y) + * MUX.fp_zero(x, y, b) -> CSEL.f.eq(b, 0, x, y) + * + * MUX.bit cannot be transformed like this. + * + * Note that MUX.v2i16 has partial support for swizzles, which CSEL.v2i16 lacks. + * So we must check the swizzles too. + */ +static bool +bi_can_csel(bi_instr *I) +{ + return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) && + (I->mux != BI_MUX_BIT) && + (I->src[0].swizzle == BI_SWIZZLE_H01) && + (I->src[1].swizzle == BI_SWIZZLE_H01) && + (I->src[2].swizzle == BI_SWIZZLE_H01); +} + +static enum bi_opcode +bi_csel_for_mux(bool b32, enum bi_mux mux) +{ + switch (mux) { + case BI_MUX_INT_ZERO: + return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16; + case BI_MUX_NEG: + return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16; + case BI_MUX_FP_ZERO: + return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16; + default: + unreachable("No CSEL for MUX.bit"); + } +} + +static void +bi_replace_mux_with_csel(bi_instr *I) +{ + assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16); + I->op = bi_csel_for_mux(I->op == BI_OPCODE_MUX_I32, I->mux); + I->cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ; + + bi_index vTrue = I->src[0], vFalse = I->src[1], cond = I->src[2]; + + I->src[0] = cond; + I->src[1] = bi_zero(); + I->src[2] = vTrue; + I->src[3] = vFalse; +} +/* * The encoding of *FADD.v2f16 only specifies a single abs flag. All abs * encodings are permitted by swapping operands; however, this scheme fails if * both operands are equal. Test for this case. @@ -509,6 +561,10 @@ bi_can_fma(bi_instr *ins) if (bi_can_iaddc(ins)) return true; + /* +MUX -> *CSEL */ + if (bi_can_csel(ins)) + return true; + /* *FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead */ if (ins->op == BI_OPCODE_FADD_V2F16 && bi_impacted_abs(ins)) return false; @@ -1220,6 +1276,8 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st, assert(bi_can_iaddc(instr)); instr->op = BI_OPCODE_IADDC_I32; instr->src[2] = bi_zero(); + } else if (fma && bi_can_csel(instr)) { + bi_replace_mux_with_csel(instr); } return instr; -- 2.7.4