pan/bi: Use FABSNEG pseudo ops for modifier prop
authorAlyssa Rosenzweig <alyssa@collabora.com>
Tue, 3 Aug 2021 23:16:52 +0000 (19:16 -0400)
committerAlyssa Rosenzweig <alyssa@collabora.com>
Wed, 11 Aug 2021 18:59:26 +0000 (14:59 -0400)
Simplifies pattern matching. This commit by itself fixes multiple
numerical issues -- the previous fabsneg check failed to check the round
mode or the sign of the zero. That will break Vulkan/OpenCL.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12205>

src/panfrost/bifrost/ISA.xml
src/panfrost/bifrost/bi_opt_mod_props.c
src/panfrost/bifrost/bifrost_compile.c
src/panfrost/bifrost/compiler.h

index 422ea26..bda49a0 100644 (file)
     <mod name="abs1" start="15" size="1" opt="abs"/>
   </ins>
 
+  <ins name="*FABSNEG.f32" pseudo="true">
+    <src start="0" mask="0xfb"/>
+    <mod name="neg0" start="7" size="1" opt="neg"/>
+    <mod name="abs0" start="12" size="1" opt="abs"/>
+    <mod name="widen0" size="2">
+      <opt>none</opt>
+      <opt>h0</opt>
+      <opt>h1</opt>
+    </mod>
+  </ins>
+
+  <ins name="*FABSNEG.v2f16" pseudo="true">
+    <src start="0" mask="0xfb"/>
+    <mod name="abs0" size="1" opt="abs"/>
+    <mod name="neg0" start="7" size="1" opt="neg"/>
+    <mod name="swz0" start="9" size="2" default="h01">
+      <opt>h00</opt>
+      <opt>h10</opt>
+      <opt>h01</opt>
+      <opt>h11</opt>
+    </mod>
+  </ins>
+
 </bifrost>
index 8c5f62c..004f6ed 100644 (file)
@@ -66,11 +66,10 @@ bi_takes_fneg(unsigned arch, bi_instr *I, unsigned s)
 }
 
 static bool
-bi_is_fabsneg(bi_instr *I)
+bi_is_fabsneg(enum bi_opcode op, enum bi_size size)
 {
-        return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) &&
-                (I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) &&
-                (I->clamp == BI_CLAMP_NONE);
+        return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
+               (size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
 }
 
 static enum bi_swizzle
@@ -124,10 +123,9 @@ bi_opt_mod_prop_forward(bi_context *ctx)
                         if (!mod)
                                 continue;
 
-                        if (bi_opcode_props[mod->op].size != bi_opcode_props[I->op].size)
-                                continue;
+                        unsigned size = bi_opcode_props[I->op].size;
 
-                        if (bi_is_fabsneg(mod)) {
+                        if (bi_is_fabsneg(mod->op, size)) {
                                 if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
                                         continue;
 
@@ -253,3 +251,23 @@ bi_opt_mod_prop_backward(bi_context *ctx)
         free(uses);
         free(multiple);
 }
+
+/** Lower pseudo instructions that exist to simplify the optimizer */
+
+void
+bi_lower_opt_instruction(bi_instr *I)
+{
+        switch (I->op) {
+        case BI_OPCODE_FABSNEG_F32:
+        case BI_OPCODE_FABSNEG_V2F16:
+                I->op = (bi_opcode_props[I->op].size == BI_SIZE_32) ?
+                        BI_OPCODE_FADD_F32 : BI_OPCODE_FADD_V2F16;
+
+                I->round = BI_ROUND_NONE;
+                I->src[1] = bi_negzero();
+                break;
+
+        default:
+                break;
+        }
+}
index fbe59f6..0621ce8 100644 (file)
@@ -1880,11 +1880,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
         }
 
         case nir_op_fneg:
-                bi_fadd_to(b, sz, dst, bi_neg(s0), bi_negzero(), BI_ROUND_NONE);
+                bi_fabsneg_to(b, sz, dst, bi_neg(s0));
                 break;
 
         case nir_op_fabs:
-                bi_fadd_to(b, sz, dst, bi_abs(s0), bi_negzero(), BI_ROUND_NONE);
+                bi_fabsneg_to(b, sz, dst, bi_abs(s0));
                 break;
 
         case nir_op_fsin:
@@ -3708,6 +3708,10 @@ bifrost_compile_shader_nir(nir_shader *nir,
                 bi_validate(ctx, "Optimization passes");
         }
 
+        bi_foreach_instr_global(ctx, I) {
+                bi_lower_opt_instruction(I);
+        }
+
         bi_foreach_block(ctx, block) {
                 bi_lower_branch(block);
         }
index 311c99a..72a878f 100644 (file)
@@ -902,6 +902,8 @@ void bi_lower_fau(bi_context *ctx);
 void bi_assign_scoreboard(bi_context *ctx);
 void bi_register_allocate(bi_context *ctx);
 
+void bi_lower_opt_instruction(bi_instr *I);
+
 void bi_schedule(bi_context *ctx);
 bool bi_can_fma(bi_instr *ins);
 bool bi_can_add(bi_instr *ins);