From ac636f5adb57704696c781a17f11609fad9035ec Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 4 Aug 2021 14:49:30 -0400 Subject: [PATCH] pan/bi: Use FCLAMP pseudo op for clamp prop Map nir_op_fsat/etc to FCLAMP pseudo ops, instead of FADD. There are significantly fewer knobs on FCLAMP, meaning significantly fewer things to get wrong. This fixes two(!) classes of bugs: * Swizzles (failing to lower/compose swizzles on clamps) * Numerical bugs (incorrectly treating +0.0 as an additive identity) Signed-off-by: Alyssa Rosenzweig Cc: mesa-stable Part-of: --- src/panfrost/bifrost/ISA.xml | 20 ++++++++++++++++++++ src/panfrost/bifrost/bi_lower_swizzle.c | 22 ++++++++++++++++++---- src/panfrost/bifrost/bi_opt_mod_props.c | 13 ++++++------- src/panfrost/bifrost/bifrost_compile.c | 6 +++--- 4 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/panfrost/bifrost/ISA.xml b/src/panfrost/bifrost/ISA.xml index bda49a0..f49e44f 100644 --- a/src/panfrost/bifrost/ISA.xml +++ b/src/panfrost/bifrost/ISA.xml @@ -8313,4 +8313,24 @@ + + + + none + clamp_0_inf + clamp_m1_1 + clamp_0_1 + + + + + + + none + clamp_0_inf + clamp_m1_1 + clamp_0_1 + + + diff --git a/src/panfrost/bifrost/bi_lower_swizzle.c b/src/panfrost/bifrost/bi_lower_swizzle.c index d5c4e96..a0d5917 100644 --- a/src/panfrost/bifrost/bi_lower_swizzle.c +++ b/src/panfrost/bifrost/bi_lower_swizzle.c @@ -33,6 +33,10 @@ static void bi_lower_swizzle_16(bi_context *ctx, bi_instr *ins, unsigned src) { + /* Identity is ok */ + if (ins->src[src].swizzle == BI_SWIZZLE_H01) + return; + /* TODO: Use the opcode table and be a lot more methodical about this... */ switch (ins->op) { /* Some instructions used with 16-bit data never have swizzles */ @@ -66,14 +70,24 @@ bi_lower_swizzle_16(bi_context *ctx, bi_instr *ins, unsigned src) return; else break; + + /* We don't want to deal with reswizzling logic in modifier prop. Move + * the swizzle outside, it's easier for clamp propagation. */ + case BI_OPCODE_FCLAMP_V2F16: + { + bi_builder b = bi_init_builder(ctx, bi_after_instr(ins)); + bi_index dest = ins->dest[0]; + bi_index tmp = bi_temp(ctx); + + ins->dest[0] = tmp; + bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp)); + return; + } + default: return; } - /* Identity is ok (TODO: what about replicate only?) */ - if (ins->src[src].swizzle == BI_SWIZZLE_H01) - return; - /* If the instruction is scalar we can ignore the other component */ if (ins->dest[0].swizzle == BI_SWIZZLE_H00 && ins->src[src].swizzle == BI_SWIZZLE_H00) diff --git a/src/panfrost/bifrost/bi_opt_mod_props.c b/src/panfrost/bifrost/bi_opt_mod_props.c index 004f6ed..f4afdc1 100644 --- a/src/panfrost/bifrost/bi_opt_mod_props.c +++ b/src/panfrost/bifrost/bi_opt_mod_props.c @@ -157,19 +157,16 @@ bi_takes_clamp(bi_instr *I) } static bool -bi_is_fclamp(bi_instr *I) +bi_is_fclamp(enum bi_opcode op, enum bi_size size) { - return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) && - (!I->src[0].abs && !I->src[0].neg) && - (I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) && - (I->clamp != BI_CLAMP_NONE); + return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) || + (size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16); } static bool bi_optimizer_clamp(bi_instr *I, bi_instr *use) { - if (bi_opcode_props[use->op].size != bi_opcode_props[I->op].size) return false; - if (!bi_is_fclamp(use)) return false; + if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size)) return false; if (!bi_takes_clamp(I)) return false; /* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */ @@ -260,6 +257,8 @@ bi_lower_opt_instruction(bi_instr *I) switch (I->op) { case BI_OPCODE_FABSNEG_F32: case BI_OPCODE_FABSNEG_V2F16: + case BI_OPCODE_FCLAMP_F32: + case BI_OPCODE_FCLAMP_V2F16: I->op = (bi_opcode_props[I->op].size == BI_SIZE_32) ? BI_OPCODE_FADD_F32 : BI_OPCODE_FADD_V2F16; diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 0621ce8..d9d005d 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -1862,19 +1862,19 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) break; case nir_op_fsat: { - bi_instr *I = bi_fadd_to(b, sz, dst, s0, bi_negzero(), BI_ROUND_NONE); + bi_instr *I = bi_fclamp_to(b, sz, dst, s0); I->clamp = BI_CLAMP_CLAMP_0_1; break; } case nir_op_fsat_signed_mali: { - bi_instr *I = bi_fadd_to(b, sz, dst, s0, bi_negzero(), BI_ROUND_NONE); + bi_instr *I = bi_fclamp_to(b, sz, dst, s0); I->clamp = BI_CLAMP_CLAMP_M1_1; break; } case nir_op_fclamp_pos_mali: { - bi_instr *I = bi_fadd_to(b, sz, dst, s0, bi_negzero(), BI_ROUND_NONE); + bi_instr *I = bi_fclamp_to(b, sz, dst, s0); I->clamp = BI_CLAMP_CLAMP_0_INF; break; } -- 2.7.4