From: Alejandro Piñeiro <apinheiro@igalia.com>
Date: Mon, 27 Sep 2021 23:17:08 +0000 (+0200)
Subject: broadcom/qpu: implement switch rules for fmin/fmax fadd/faddnf for v71
X-Git-Tag: upstream/23.3.3~898
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=310dec3ec94922f10801cbbf039d9047358cc0c5;p=platform%2Fupstream%2Fmesa.git

broadcom/qpu: implement switch rules for fmin/fmax fadd/faddnf for v71

They use the same opcodes, and switch between one and the other based
on raddr.

Note that the rule includes also if small_imm_a/b are used. That is
still not in place so that part is hardcoded. Would be updated later
when small immediates support for v71 gets implemented.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25450>
---

diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
index 5169de2..ff431be 100644
--- a/src/broadcom/qpu/qpu_pack.c
+++ b/src/broadcom/qpu/qpu_pack.c
@@ -654,7 +654,9 @@ static const struct opcode_desc mul_ops_v33[] = {
  * opcodes that changed on v71
  */
 static const struct opcode_desc add_ops_v71[] = {
+        /* FADD is FADDNF depending on the order of the raddr_a/raddr_b. */
         { 0,   47,  .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD },
+        { 0,   47,  .raddr_mask = ANYOPMASK, V3D_QPU_A_FADDNF },
         { 53,  55,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
         { 56,  56,  .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD },
         { 57,  59,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
@@ -669,6 +671,10 @@ static const struct opcode_desc add_ops_v71[] = {
         { 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR },
         { 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR },
         { 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR },
+        /* FMIN is instead FMAX depending on the raddr_a/b order. */
+        { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMIN },
+        { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMAX },
+        { 176, 180, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFMIN },
 
         { 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND },
         { 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR },
@@ -1165,6 +1171,22 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst
 
         instr->alu.add.op = desc->op;
 
+        /* FADD/FADDNF and FMIN/FMAX are determined by the order of the
+         * operands.
+         */
+        /* FIXME: for now hardcoded values, until we get the small_imm support
+         * in place
+         */
+        uint32_t small_imm_a = 0;
+        uint32_t small_imm_b = 0;
+        if (small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
+            small_imm_b * 256 + (op & 3) * 64 + raddr_b) {
+                if (instr->alu.add.op == V3D_QPU_A_FMIN)
+                        instr->alu.add.op = V3D_QPU_A_FMAX;
+                if (instr->alu.add.op == V3D_QPU_A_FADD)
+                        instr->alu.add.op = V3D_QPU_A_FADDNF;
+        }
+
         /* Some QPU ops require a bit more than just basic opcode and mux a/b
          * comparisons to distinguish them.
          */
@@ -1757,6 +1779,11 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
                 uint32_t output_pack;
                 uint32_t a_unpack;
                 uint32_t b_unpack;
+                /* FIXME: for now hardcoded values, until we get the small_imm
+                 * support in place
+                 */
+                uint32_t small_imm_a = 0;
+                uint32_t small_imm_b = 0;
 
                 if (instr->alu.add.op != V3D_QPU_A_FCMP) {
                         if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
@@ -1776,6 +1803,27 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
                         return false;
                 }
 
+                /* These operations with commutative operands are
+                 * distinguished by the order of the operands come in.
+                 */
+                bool ordering =
+                        small_imm_a * 256 + a_unpack * 64 + raddr_a >
+                        small_imm_b * 256 + b_unpack * 64 + raddr_b;
+                if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
+                      instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
+                    ((instr->alu.add.op == V3D_QPU_A_FMAX ||
+                      instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
+                        uint32_t temp;
+
+                        temp = a_unpack;
+                        a_unpack = b_unpack;
+                        b_unpack = temp;
+
+                        temp = raddr_a;
+                        raddr_a = raddr_b;
+                        raddr_b = temp;
+                }
+
                 opcode |= a_unpack << 2;
                 opcode |= b_unpack << 0;