From b7edf30191cae945b4836d683762d0ebed6efbfe Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Aug 2014 14:16:19 -0700
Subject: [PATCH] vc4: Add disasm for A-file unpack operations.

The A-file unpack is just like R4 unpack, except that if you don't do a
floating-point operation it won't do float conversion (so int16 gets
scaled up to int32).
---
 src/gallium/drivers/vc4/vc4_qpu_defines.h | 18 +++++++++---------
 src/gallium/drivers/vc4/vc4_qpu_disasm.c  | 28 ++++++++++++++++------------
 src/gallium/drivers/vc4/vc4_qpu_emit.c    |  2 +-
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h
index 0715df2..cee2ae8 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_defines.h
+++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h
@@ -195,15 +195,15 @@ enum qpu_pack_a {
         QPU_PACK_A_8D_SAT,
 };
 
-enum qpu_unpack_r4 {
-        QPU_UNPACK_R4_NOP,
-        QPU_UNPACK_R4_F16A_TO_F32,
-        QPU_UNPACK_R4_F16B_TO_F32,
-        QPU_UNPACK_R4_8D_REP,
-        QPU_UNPACK_R4_8A,
-        QPU_UNPACK_R4_8B,
-        QPU_UNPACK_R4_8C,
-        QPU_UNPACK_R4_8D,
+enum qpu_unpack {
+        QPU_UNPACK_NOP,
+        QPU_UNPACK_F16A_TO_F32,
+        QPU_UNPACK_F16B_TO_F32,
+        QPU_UNPACK_8D_REP,
+        QPU_UNPACK_8A,
+        QPU_UNPACK_8B,
+        QPU_UNPACK_8C,
+        QPU_UNPACK_8D,
 };
 
 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
index 5257105..403a18e 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
@@ -93,15 +93,18 @@ static const char *qpu_pack_mul[] = {
         [QPU_PACK_MUL_8D] = "8d",
 };
 
-static const char *qpu_unpack_r4[] = {
-        [QPU_UNPACK_R4_NOP] = "",
-        [QPU_UNPACK_R4_F16A_TO_F32] = "f16a",
-        [QPU_UNPACK_R4_F16B_TO_F32] = "f16b",
-        [QPU_UNPACK_R4_8D_REP] = "8d_rep",
-        [QPU_UNPACK_R4_8A] = "8a",
-        [QPU_UNPACK_R4_8B] = "8b",
-        [QPU_UNPACK_R4_8C] = "8c",
-        [QPU_UNPACK_R4_8D] = "8d",
+/* The QPU unpack for A and R4 files can be described the same, it's just that
+ * the R4 variants are convert-to-float only, with no int support.
+ */
+static const char *qpu_unpack[] = {
+        [QPU_UNPACK_NOP] = "",
+        [QPU_UNPACK_F16A_TO_F32] = "f16a",
+        [QPU_UNPACK_F16B_TO_F32] = "f16b",
+        [QPU_UNPACK_8D_REP] = "8d_rep",
+        [QPU_UNPACK_8A] = "8a",
+        [QPU_UNPACK_8B] = "8b",
+        [QPU_UNPACK_8C] = "8c",
+        [QPU_UNPACK_8D] = "8d",
 };
 
 static const char *special_read_a[] = {
@@ -300,9 +303,10 @@ print_alu_src(uint64_t inst, uint32_t mux)
                         fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
         }
 
-        if (mux == QPU_MUX_R4 && (inst & QPU_PM) &&
-            unpack != QPU_UNPACK_R4_NOP) {
-                fprintf(stderr, ".%s", DESC(qpu_unpack_r4, unpack));
+        if (unpack != QPU_UNPACK_NOP &&
+            ((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
+             (mux == QPU_MUX_R4) && (inst & QPU_PM))) {
+                fprintf(stderr, ".%s", DESC(qpu_unpack, unpack));
         }
 }
 
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 9a5dfa4..3f30f2c 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -483,7 +483,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                         assert(src[0].mux == QPU_MUX_R4);
                         queue(c, qpu_a_MOV(dst, src[0]));
                         *last_inst(c) |= QPU_PM;
-                        *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_R4_8A +
+                        *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
                                                        (qinst->op -
                                                         QOP_R4_UNPACK_A),
                                                        QPU_UNPACK);
-- 
2.7.4