freedreno/afuc: Re-indent
authorRob Clark <robdclark@chromium.org>
Fri, 16 Apr 2021 17:49:48 +0000 (10:49 -0700)
committerMarge Bot <eric+marge@anholt.net>
Sat, 17 Apr 2021 15:38:56 +0000 (15:38 +0000)
clang-format -fallback-style=none --style=file -i src/freedreno/afuc/*.[ch]

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10293>

src/freedreno/afuc/afuc.h
src/freedreno/afuc/asm.c
src/freedreno/afuc/asm.h
src/freedreno/afuc/disasm.c

index 89b79ef..a423119 100644 (file)
@@ -41,154 +41,152 @@ RB.
  * (ie. top two bits are '11' are encoded as 6 bits.  See get_opc()
  */
 typedef enum {
-       OPC_NOP    = 0x00,
-
-       OPC_ADD    = 0x01,  /* add immediate */
-       OPC_ADDHI  = 0x02,  /* add immediate (hi 32b of 64b) */
-       OPC_SUB    = 0x03,  /* subtract immediate */
-       OPC_SUBHI  = 0x04,  /* subtract immediate (hi 32b of 64b) */
-       OPC_AND    = 0x05,  /* AND immediate */
-       OPC_OR     = 0x06,  /* OR immediate */
-       OPC_XOR    = 0x07,  /* XOR immediate */
-       OPC_NOT    = 0x08,  /* bitwise not of immed (src1 ignored) */
-       OPC_SHL    = 0x09,  /* shift-left immediate */
-       OPC_USHR   = 0x0a,  /* unsigned shift right by immediate */
-       OPC_ISHR   = 0x0b,  /* signed shift right by immediate */
-       OPC_ROT    = 0x0c,  /* rotate left (left shift with wrap-around) */
-       OPC_MUL8   = 0x0d,  /* 8bit multiply by immediate */
-       OPC_MIN    = 0x0e,
-       OPC_MAX    = 0x0f,
-       OPC_CMP    = 0x10,  /* compare src to immed */
-       OPC_MOVI   = 0x11,  /* move immediate */
-
-       /* Return the most-significant bit of src2, or 0 if src2 == 0 (the
-        * same as if src2 == 1). src1 is ignored. Note that this overlaps
-        * with STORE6, so it can only be used with the two-source encoding.
-        */
-       OPC_MSB    = 0x14,
-
-
-       OPC_ALU    = 0x13,  /* ALU instruction with two src registers */
-
-       /* These seem something to do with setting some external state..
-        * doesn't seem to map *directly* to registers, but I guess that
-        * is where things end up.  For example, this sequence in the
-        * CP_INDIRECT_BUFFER handler:
-        *
-        *     mov $02, $data   ; low 32b of IB target address
-        *     mov $03, $data   ; high 32b of IB target
-        *     mov $04, $data   ; IB size in dwords
-        *     breq $04, 0x0, #l23 (#69, 04a2)
-        *     and $05, $18, 0x0003
-        *     shl $05, $05, 0x0002
-        *     cwrite $02, [$05 + 0x0b0], 0x8
-        *     cwrite $03, [$05 + 0x0b1], 0x8
-        *     cwrite $04, [$05 + 0x0b2], 0x8
-        *
-        * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and
-        * 0x0b22->0x0b24 (IB2).  Presumably $05 ends up w/ different value
-        * for RB->IB1 vs IB1->IB2.
-        */
-       OPC_CWRITE5 = 0x15,
-       OPC_CREAD5  = 0x16,
-
-       /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes
-        * that let you read/write directly to memory (and bypass the IOMMU?).
-        */
-       OPC_STORE6  = 0x14,
-       OPC_CWRITE6 = 0x15,
-       OPC_LOAD6   = 0x16,
-       OPC_CREAD6  = 0x17,
-
-       OPC_BRNEI  = 0x30,  /* relative branch (if $src != immed) */
-       OPC_BREQI  = 0x31,  /* relative branch (if $src == immed) */
-       OPC_BRNEB  = 0x32,  /* relative branch (if bit not set) */
-       OPC_BREQB  = 0x33,  /* relative branch (if bit is set) */
-       OPC_RET    = 0x34,  /* return */
-       OPC_CALL   = 0x35,  /* "function" call */
-       OPC_WIN    = 0x36,  /* wait for input (ie. wait for WPTR to advance) */
-       OPC_PREEMPTLEAVE6 = 0x38,  /* try to leave preemption */
-       OPC_SETSECURE = 0x3b, /* switch secure mode on/off */
+   OPC_NOP = 0x00,
+
+   OPC_ADD = 0x01,   /* add immediate */
+   OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */
+   OPC_SUB = 0x03,   /* subtract immediate */
+   OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */
+   OPC_AND = 0x05,   /* AND immediate */
+   OPC_OR = 0x06,    /* OR immediate */
+   OPC_XOR = 0x07,   /* XOR immediate */
+   OPC_NOT = 0x08,   /* bitwise not of immed (src1 ignored) */
+   OPC_SHL = 0x09,   /* shift-left immediate */
+   OPC_USHR = 0x0a,  /* unsigned shift right by immediate */
+   OPC_ISHR = 0x0b,  /* signed shift right by immediate */
+   OPC_ROT = 0x0c,   /* rotate left (left shift with wrap-around) */
+   OPC_MUL8 = 0x0d,  /* 8bit multiply by immediate */
+   OPC_MIN = 0x0e,
+   OPC_MAX = 0x0f,
+   OPC_CMP = 0x10,  /* compare src to immed */
+   OPC_MOVI = 0x11, /* move immediate */
+
+   /* Return the most-significant bit of src2, or 0 if src2 == 0 (the
+    * same as if src2 == 1). src1 is ignored. Note that this overlaps
+    * with STORE6, so it can only be used with the two-source encoding.
+    */
+   OPC_MSB = 0x14,
+
+   OPC_ALU = 0x13, /* ALU instruction with two src registers */
+
+   /* These seem something to do with setting some external state..
+    * doesn't seem to map *directly* to registers, but I guess that
+    * is where things end up.  For example, this sequence in the
+    * CP_INDIRECT_BUFFER handler:
+    *
+    *     mov $02, $data   ; low 32b of IB target address
+    *     mov $03, $data   ; high 32b of IB target
+    *     mov $04, $data   ; IB size in dwords
+    *     breq $04, 0x0, #l23 (#69, 04a2)
+    *     and $05, $18, 0x0003
+    *     shl $05, $05, 0x0002
+    *     cwrite $02, [$05 + 0x0b0], 0x8
+    *     cwrite $03, [$05 + 0x0b1], 0x8
+    *     cwrite $04, [$05 + 0x0b2], 0x8
+    *
+    * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and
+    * 0x0b22->0x0b24 (IB2).  Presumably $05 ends up w/ different value
+    * for RB->IB1 vs IB1->IB2.
+    */
+   OPC_CWRITE5 = 0x15,
+   OPC_CREAD5 = 0x16,
+
+   /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes
+    * that let you read/write directly to memory (and bypass the IOMMU?).
+    */
+   OPC_STORE6 = 0x14,
+   OPC_CWRITE6 = 0x15,
+   OPC_LOAD6 = 0x16,
+   OPC_CREAD6 = 0x17,
+
+   OPC_BRNEI = 0x30,         /* relative branch (if $src != immed) */
+   OPC_BREQI = 0x31,         /* relative branch (if $src == immed) */
+   OPC_BRNEB = 0x32,         /* relative branch (if bit not set) */
+   OPC_BREQB = 0x33,         /* relative branch (if bit is set) */
+   OPC_RET = 0x34,           /* return */
+   OPC_CALL = 0x35,          /* "function" call */
+   OPC_WIN = 0x36,           /* wait for input (ie. wait for WPTR to advance) */
+   OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */
+   OPC_SETSECURE = 0x3b,     /* switch secure mode on/off */
 } afuc_opc;
 
-
 typedef union PACKED {
-       /* addi, subi, andi, ori, xori, etc: */
-       struct PACKED {
-               uint32_t uimm    : 16;
-               uint32_t dst     : 5;
-               uint32_t src     : 5;
-               uint32_t hdr     : 6;
-       } alui;
-       struct PACKED {
-               uint32_t uimm    : 16;
-               uint32_t dst     : 5;
-               uint32_t shift   : 5;
-               uint32_t hdr     : 6;
-       } movi;
-       struct PACKED {
-               uint32_t alu     : 5;
-               uint32_t pad     : 4;
-               uint32_t xmov    : 2; /* execute eXtra mov's based on $rem */
-               uint32_t dst     : 5;
-               uint32_t src2    : 5;
-               uint32_t src1    : 5;
-               uint32_t hdr     : 6;
-       } alu;
-       struct PACKED {
-               uint32_t uimm    : 12;
-               uint32_t flags   : 4;
-               uint32_t src1    : 5;     /* dst (cread) or src (cwrite) register */
-               uint32_t src2    : 5;     /* read or write address is src2+uimm */
-               uint32_t hdr     : 6;
-       } control;
-       struct PACKED {
-               int32_t  ioff    : 16;    /* relative offset */
-               uint32_t bit_or_imm : 5;
-               uint32_t src     : 5;
-               uint32_t hdr     : 6;
-       } br;
-       struct PACKED {
-               uint32_t uoff    : 26;    /* absolute (unsigned) offset */
-               uint32_t hdr     : 6;
-       } call;
-       struct PACKED {
-               uint32_t pad       : 25;
-               uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */
-               uint32_t hdr       : 6;
-       } ret;
-       struct PACKED {
-               uint32_t pad     : 26;
-               uint32_t hdr     : 6;
-       } waitin;
-       struct PACKED {
-               uint32_t pad     : 26;
-               uint32_t opc_r   : 6;
-       };
+   /* addi, subi, andi, ori, xori, etc: */
+   struct PACKED {
+      uint32_t uimm : 16;
+      uint32_t dst : 5;
+      uint32_t src : 5;
+      uint32_t hdr : 6;
+   } alui;
+   struct PACKED {
+      uint32_t uimm : 16;
+      uint32_t dst : 5;
+      uint32_t shift : 5;
+      uint32_t hdr : 6;
+   } movi;
+   struct PACKED {
+      uint32_t alu : 5;
+      uint32_t pad : 4;
+      uint32_t xmov : 2; /* execute eXtra mov's based on $rem */
+      uint32_t dst : 5;
+      uint32_t src2 : 5;
+      uint32_t src1 : 5;
+      uint32_t hdr : 6;
+   } alu;
+   struct PACKED {
+      uint32_t uimm : 12;
+      uint32_t flags : 4;
+      uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */
+      uint32_t src2 : 5; /* read or write address is src2+uimm */
+      uint32_t hdr : 6;
+   } control;
+   struct PACKED {
+      int32_t ioff : 16; /* relative offset */
+      uint32_t bit_or_imm : 5;
+      uint32_t src : 5;
+      uint32_t hdr : 6;
+   } br;
+   struct PACKED {
+      uint32_t uoff : 26; /* absolute (unsigned) offset */
+      uint32_t hdr : 6;
+   } call;
+   struct PACKED {
+      uint32_t pad : 25;
+      uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */
+      uint32_t hdr : 6;
+   } ret;
+   struct PACKED {
+      uint32_t pad : 26;
+      uint32_t hdr : 6;
+   } waitin;
+   struct PACKED {
+      uint32_t pad : 26;
+      uint32_t opc_r : 6;
+   };
 
 } afuc_instr;
 
 static inline void
 afuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep)
 {
-       if (ai->opc_r < 0x30) {
-               *opc = ai->opc_r >> 1;
-               *rep = ai->opc_r & 0x1;
-       } else {
-               *opc = ai->opc_r;
-               *rep = false;
-       }
+   if (ai->opc_r < 0x30) {
+      *opc = ai->opc_r >> 1;
+      *rep = ai->opc_r & 0x1;
+   } else {
+      *opc = ai->opc_r;
+      *rep = false;
+   }
 }
 
 static inline void
 afuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep)
 {
-       if (opc < 0x30) {
-               ai->opc_r = opc << 1;
-               ai->opc_r |= !!rep;
-       } else {
-               ai->opc_r = opc;
-       }
+   if (opc < 0x30) {
+      ai->opc_r = opc << 1;
+      ai->opc_r |= !!rep;
+   } else {
+      ai->opc_r = opc;
+   }
 }
 
 #endif /* _AFUC_H_ */
index 7b302e4..c90a91e 100644 (file)
  * SOFTWARE.
  */
 
+#include <assert.h>
 #include <err.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <unistd.h>
 #include <fcntl.h>
+#include <getopt.h>
 #include <stdarg.h>
-#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
-#include <assert.h>
-#include <getopt.h>
+#include <unistd.h>
 
 #include "util/macros.h"
 #include "afuc.h"
+#include "asm.h"
+#include "parser.h"
 #include "rnn.h"
 #include "rnndec.h"
-#include "parser.h"
-#include "asm.h"
 
 int gpuver;
 
-
 static struct rnndeccontext *ctx;
 static struct rnndb *db;
 static struct rnndomain *control_regs;
 struct rnndomain *dom[2];
 
-
 /* bit lame to hard-code max but fw sizes are small */
 static struct asm_instruction instructions[0x2000];
 static unsigned num_instructions;
@@ -56,410 +54,437 @@ static unsigned num_instructions;
 static struct asm_label labels[0x512];
 static unsigned num_labels;
 
-struct asm_instruction *next_instr(int tok)
+struct asm_instruction *
+next_instr(int tok)
 {
-       struct asm_instruction *ai = &instructions[num_instructions++];
-       assert(num_instructions < ARRAY_SIZE(instructions));
-       ai->tok = tok;
-       return ai;
+   struct asm_instruction *ai = &instructions[num_instructions++];
+   assert(num_instructions < ARRAY_SIZE(instructions));
+   ai->tok = tok;
+   return ai;
 }
 
-void decl_label(const char *str)
+void
+decl_label(const char *str)
 {
-       struct asm_label *label = &labels[num_labels++];
+   struct asm_label *label = &labels[num_labels++];
 
-       assert(num_labels < ARRAY_SIZE(labels));
+   assert(num_labels < ARRAY_SIZE(labels));
 
-       label->offset = num_instructions;
-       label->label = str;
+   label->offset = num_instructions;
+   label->label = str;
 }
 
-static int resolve_label(const char *str)
+static int
+resolve_label(const char *str)
 {
-       int i;
+   int i;
 
-       for (i = 0; i < num_labels; i++) {
-               struct asm_label *label = &labels[i];
+   for (i = 0; i < num_labels; i++) {
+      struct asm_label *label = &labels[i];
 
-               if (!strcmp(str, label->label)) {
-                       return label->offset;
-               }
-       }
+      if (!strcmp(str, label->label)) {
+         return label->offset;
+      }
+   }
 
-       fprintf(stderr, "Undeclared label: %s\n", str);
-       exit(2);
+   fprintf(stderr, "Undeclared label: %s\n", str);
+   exit(2);
 }
 
-static afuc_opc tok2alu(int tok)
+static afuc_opc
+tok2alu(int tok)
 {
-       switch (tok) {
-       case T_OP_ADD:   return OPC_ADD;
-       case T_OP_ADDHI: return OPC_ADDHI;
-       case T_OP_SUB:   return OPC_SUB;
-       case T_OP_SUBHI: return OPC_SUBHI;
-       case T_OP_AND:   return OPC_AND;
-       case T_OP_OR:    return OPC_OR;
-       case T_OP_XOR:   return OPC_XOR;
-       case T_OP_NOT:   return OPC_NOT;
-       case T_OP_SHL:   return OPC_SHL;
-       case T_OP_USHR:  return OPC_USHR;
-       case T_OP_ISHR:  return OPC_ISHR;
-       case T_OP_ROT:   return OPC_ROT;
-       case T_OP_MUL8:  return OPC_MUL8;
-       case T_OP_MIN:   return OPC_MIN;
-       case T_OP_MAX:   return OPC_MAX;
-       case T_OP_CMP:   return OPC_CMP;
-       case T_OP_MSB:   return OPC_MSB;
-       default:
-               assert(0);
-               return -1;
-       }
+   switch (tok) {
+   case T_OP_ADD:
+      return OPC_ADD;
+   case T_OP_ADDHI:
+      return OPC_ADDHI;
+   case T_OP_SUB:
+      return OPC_SUB;
+   case T_OP_SUBHI:
+      return OPC_SUBHI;
+   case T_OP_AND:
+      return OPC_AND;
+   case T_OP_OR:
+      return OPC_OR;
+   case T_OP_XOR:
+      return OPC_XOR;
+   case T_OP_NOT:
+      return OPC_NOT;
+   case T_OP_SHL:
+      return OPC_SHL;
+   case T_OP_USHR:
+      return OPC_USHR;
+   case T_OP_ISHR:
+      return OPC_ISHR;
+   case T_OP_ROT:
+      return OPC_ROT;
+   case T_OP_MUL8:
+      return OPC_MUL8;
+   case T_OP_MIN:
+      return OPC_MIN;
+   case T_OP_MAX:
+      return OPC_MAX;
+   case T_OP_CMP:
+      return OPC_CMP;
+   case T_OP_MSB:
+      return OPC_MSB;
+   default:
+      assert(0);
+      return -1;
+   }
 }
 
-static void emit_instructions(int outfd)
+static void
+emit_instructions(int outfd)
 {
-       int i;
-
-       /* there is an extra 0x00000000 which kernel strips off.. we could
-        * perhaps use it for versioning.
-        */
-       i = 0;
-       write(outfd, &i, 4);
-
-       for (i = 0; i < num_instructions; i++) {
-               struct asm_instruction *ai = &instructions[i];
-               afuc_instr instr = {0};
-               afuc_opc opc;
-
-               /* special case, 2nd dword is patched up w/ # of instructions
-                * (ie. offset of jmptbl)
-                */
-               if (i == 1) {
-                       assert(ai->is_literal);
-                       ai->literal &= ~0xffff;
-                       ai->literal |= num_instructions;
-               }
-
-               if (ai->is_literal) {
-                       write(outfd, &ai->literal, 4);
-                       continue;
-               }
-
-               switch (ai->tok) {
-               case T_OP_NOP:
-                       opc = OPC_NOP;
-                       if (gpuver >= 6)
-                               instr.pad = 0x1000000;
-                       break;
-               case T_OP_ADD:
-               case T_OP_ADDHI:
-               case T_OP_SUB:
-               case T_OP_SUBHI:
-               case T_OP_AND:
-               case T_OP_OR:
-               case T_OP_XOR:
-               case T_OP_NOT:
-               case T_OP_SHL:
-               case T_OP_USHR:
-               case T_OP_ISHR:
-               case T_OP_ROT:
-               case T_OP_MUL8:
-               case T_OP_MIN:
-               case T_OP_MAX:
-               case T_OP_CMP:
-               case T_OP_MSB:
-                       if (ai->has_immed) {
-                               /* MSB overlaps with STORE */
-                               assert(ai->tok != T_OP_MSB);
-                               if (ai->xmov) {
-                                       fprintf(stderr, "ALU instruction cannot have immediate and xmov\n");
-                                       exit(1);
-                               }
-                               opc = tok2alu(ai->tok);
-                               instr.alui.dst = ai->dst;
-                               instr.alui.src = ai->src1;
-                               instr.alui.uimm = ai->immed;
-                       } else {
-                               opc = OPC_ALU;
-                               instr.alu.dst  = ai->dst;
-                               instr.alu.src1 = ai->src1;
-                               instr.alu.src2 = ai->src2;
-                               instr.alu.xmov = ai->xmov;
-                               instr.alu.alu = tok2alu(ai->tok);
-                       }
-                       break;
-               case T_OP_MOV:
-                       /* move can either be encoded as movi (ie. move w/ immed) or
-                        * an alu instruction
-                        */
-                       if ((ai->has_immed || ai->label) && ai->xmov) {
-                               fprintf(stderr, "ALU instruction cannot have immediate and xmov\n");
-                               exit(1);
-                       }
-                       if (ai->has_immed) {
-                               opc = OPC_MOVI;
-                               instr.movi.dst = ai->dst;
-                               instr.movi.uimm = ai->immed;
-                               instr.movi.shift = ai->shift;
-                       } else if (ai->label) {
-                               /* mov w/ a label is just an alias for an immediate, this
-                                * is useful to load the address of a constant table into
-                                * a register:
-                                */
-                               opc = OPC_MOVI;
-                               instr.movi.dst = ai->dst;
-                               instr.movi.uimm = resolve_label(ai->label);
-                               instr.movi.shift = ai->shift;
-                       } else {
-                               /* encode as: or $dst, $00, $src */
-                               opc = OPC_ALU;
-                               instr.alu.dst  = ai->dst;
-                               instr.alu.src1 = 0x00;      /* $00 reads-back 0 */
-                               instr.alu.src2 = ai->src1;
-                               instr.alu.xmov = ai->xmov;
-                               instr.alu.alu = OPC_OR;
-                       }
-                       break;
-               case T_OP_CWRITE:
-               case T_OP_CREAD:
-               case T_OP_STORE:
-               case T_OP_LOAD:
-                       if (gpuver >= 6) {
-                               if (ai->tok == T_OP_CWRITE) {
-                                       opc = OPC_CWRITE6;
-                               } else if (ai->tok == T_OP_CREAD) {
-                                       opc = OPC_CREAD6;
-                               } else if (ai->tok == T_OP_STORE) {
-                                       opc = OPC_STORE6;
-                               } else if (ai->tok == T_OP_LOAD) {
-                                       opc = OPC_LOAD6;
-                               }
-                       } else {
-                               if (ai->tok == T_OP_CWRITE) {
-                                       opc = OPC_CWRITE5;
-                               } else if (ai->tok == T_OP_CREAD) {
-                                       opc = OPC_CREAD5;
-                               } else if (ai->tok == T_OP_STORE ||
-                                          ai->tok == T_OP_LOAD) {
-                                       fprintf(stderr, "load and store do not exist on a5xx\n");
-                                       exit(1);
-                               }
-                       }
-                       instr.control.src1 = ai->src1;
-                       instr.control.src2 = ai->src2;
-                       instr.control.flags = ai->bit;
-                       instr.control.uimm = ai->immed;
-                       break;
-               case T_OP_BRNE:
-               case T_OP_BREQ:
-                       if (ai->has_immed) {
-                               opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEI : OPC_BREQI;
-                               instr.br.bit_or_imm = ai->immed;
-                       } else {
-                               opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEB : OPC_BREQB;
-                               instr.br.bit_or_imm = ai->bit;
-                       }
-                       instr.br.src = ai->src1;
-                       instr.br.ioff = resolve_label(ai->label) - i;
-                       break;
-               case T_OP_RET:
-                       opc = OPC_RET;
-                       break;
-               case T_OP_IRET:
-                       opc = OPC_RET;
-                       instr.ret.interrupt = 1;
-                       break;
-               case T_OP_CALL:
-                       opc = OPC_CALL;
-                       instr.call.uoff = resolve_label(ai->label);
-                       break;
-               case T_OP_PREEMPTLEAVE:
-                       opc = OPC_PREEMPTLEAVE6;
-                       instr.call.uoff = resolve_label(ai->label);
-                       break;
-               case T_OP_SETSECURE:
-                       opc = OPC_SETSECURE;
-                       if (resolve_label(ai->label) != i + 3) {
-                               fprintf(stderr, "jump label %s is incorrect for setsecure\n", ai->label);
-                               exit(1);
-                       }
-                       if (ai->src1 != 0x2) {
-                               fprintf(stderr, "source for setsecure must be $02\n");
-                               exit(1);
-                       }
-                       break;
-               case T_OP_JUMP:
-                       /* encode jump as: brne $00, b0, #label */
-                       opc = OPC_BRNEB;
-                       instr.br.bit_or_imm = 0;
-                       instr.br.src = 0x00;       /* $00 reads-back 0.. compare to 0 */
-                       instr.br.ioff = resolve_label(ai->label) - i;
-                       break;
-               case T_OP_WAITIN:
-                       opc = OPC_WIN;
-                       break;
-               default:
-                       unreachable("");
-               }
-
-               afuc_set_opc(&instr, opc, ai->rep);
-
-               write(outfd, &instr, 4);
-       }
-
+   int i;
+
+   /* there is an extra 0x00000000 which kernel strips off.. we could
+    * perhaps use it for versioning.
+    */
+   i = 0;
+   write(outfd, &i, 4);
+
+   for (i = 0; i < num_instructions; i++) {
+      struct asm_instruction *ai = &instructions[i];
+      afuc_instr instr = {0};
+      afuc_opc opc;
+
+      /* special case, 2nd dword is patched up w/ # of instructions
+       * (ie. offset of jmptbl)
+       */
+      if (i == 1) {
+         assert(ai->is_literal);
+         ai->literal &= ~0xffff;
+         ai->literal |= num_instructions;
+      }
+
+      if (ai->is_literal) {
+         write(outfd, &ai->literal, 4);
+         continue;
+      }
+
+      switch (ai->tok) {
+      case T_OP_NOP:
+         opc = OPC_NOP;
+         if (gpuver >= 6)
+            instr.pad = 0x1000000;
+         break;
+      case T_OP_ADD:
+      case T_OP_ADDHI:
+      case T_OP_SUB:
+      case T_OP_SUBHI:
+      case T_OP_AND:
+      case T_OP_OR:
+      case T_OP_XOR:
+      case T_OP_NOT:
+      case T_OP_SHL:
+      case T_OP_USHR:
+      case T_OP_ISHR:
+      case T_OP_ROT:
+      case T_OP_MUL8:
+      case T_OP_MIN:
+      case T_OP_MAX:
+      case T_OP_CMP:
+      case T_OP_MSB:
+         if (ai->has_immed) {
+            /* MSB overlaps with STORE */
+            assert(ai->tok != T_OP_MSB);
+            if (ai->xmov) {
+               fprintf(stderr,
+                       "ALU instruction cannot have immediate and xmov\n");
+               exit(1);
+            }
+            opc = tok2alu(ai->tok);
+            instr.alui.dst = ai->dst;
+            instr.alui.src = ai->src1;
+            instr.alui.uimm = ai->immed;
+         } else {
+            opc = OPC_ALU;
+            instr.alu.dst = ai->dst;
+            instr.alu.src1 = ai->src1;
+            instr.alu.src2 = ai->src2;
+            instr.alu.xmov = ai->xmov;
+            instr.alu.alu = tok2alu(ai->tok);
+         }
+         break;
+      case T_OP_MOV:
+         /* move can either be encoded as movi (ie. move w/ immed) or
+          * an alu instruction
+          */
+         if ((ai->has_immed || ai->label) && ai->xmov) {
+            fprintf(stderr, "ALU instruction cannot have immediate and xmov\n");
+            exit(1);
+         }
+         if (ai->has_immed) {
+            opc = OPC_MOVI;
+            instr.movi.dst = ai->dst;
+            instr.movi.uimm = ai->immed;
+            instr.movi.shift = ai->shift;
+         } else if (ai->label) {
+            /* mov w/ a label is just an alias for an immediate, this
+             * is useful to load the address of a constant table into
+             * a register:
+             */
+            opc = OPC_MOVI;
+            instr.movi.dst = ai->dst;
+            instr.movi.uimm = resolve_label(ai->label);
+            instr.movi.shift = ai->shift;
+         } else {
+            /* encode as: or $dst, $00, $src */
+            opc = OPC_ALU;
+            instr.alu.dst = ai->dst;
+            instr.alu.src1 = 0x00; /* $00 reads-back 0 */
+            instr.alu.src2 = ai->src1;
+            instr.alu.xmov = ai->xmov;
+            instr.alu.alu = OPC_OR;
+         }
+         break;
+      case T_OP_CWRITE:
+      case T_OP_CREAD:
+      case T_OP_STORE:
+      case T_OP_LOAD:
+         if (gpuver >= 6) {
+            if (ai->tok == T_OP_CWRITE) {
+               opc = OPC_CWRITE6;
+            } else if (ai->tok == T_OP_CREAD) {
+               opc = OPC_CREAD6;
+            } else if (ai->tok == T_OP_STORE) {
+               opc = OPC_STORE6;
+            } else if (ai->tok == T_OP_LOAD) {
+               opc = OPC_LOAD6;
+            }
+         } else {
+            if (ai->tok == T_OP_CWRITE) {
+               opc = OPC_CWRITE5;
+            } else if (ai->tok == T_OP_CREAD) {
+               opc = OPC_CREAD5;
+            } else if (ai->tok == T_OP_STORE || ai->tok == T_OP_LOAD) {
+               fprintf(stderr, "load and store do not exist on a5xx\n");
+               exit(1);
+            }
+         }
+         instr.control.src1 = ai->src1;
+         instr.control.src2 = ai->src2;
+         instr.control.flags = ai->bit;
+         instr.control.uimm = ai->immed;
+         break;
+      case T_OP_BRNE:
+      case T_OP_BREQ:
+         if (ai->has_immed) {
+            opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEI : OPC_BREQI;
+            instr.br.bit_or_imm = ai->immed;
+         } else {
+            opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEB : OPC_BREQB;
+            instr.br.bit_or_imm = ai->bit;
+         }
+         instr.br.src = ai->src1;
+         instr.br.ioff = resolve_label(ai->label) - i;
+         break;
+      case T_OP_RET:
+         opc = OPC_RET;
+         break;
+      case T_OP_IRET:
+         opc = OPC_RET;
+         instr.ret.interrupt = 1;
+         break;
+      case T_OP_CALL:
+         opc = OPC_CALL;
+         instr.call.uoff = resolve_label(ai->label);
+         break;
+      case T_OP_PREEMPTLEAVE:
+         opc = OPC_PREEMPTLEAVE6;
+         instr.call.uoff = resolve_label(ai->label);
+         break;
+      case T_OP_SETSECURE:
+         opc = OPC_SETSECURE;
+         if (resolve_label(ai->label) != i + 3) {
+            fprintf(stderr, "jump label %s is incorrect for setsecure\n",
+                    ai->label);
+            exit(1);
+         }
+         if (ai->src1 != 0x2) {
+            fprintf(stderr, "source for setsecure must be $02\n");
+            exit(1);
+         }
+         break;
+      case T_OP_JUMP:
+         /* encode jump as: brne $00, b0, #label */
+         opc = OPC_BRNEB;
+         instr.br.bit_or_imm = 0;
+         instr.br.src = 0x00; /* $00 reads-back 0.. compare to 0 */
+         instr.br.ioff = resolve_label(ai->label) - i;
+         break;
+      case T_OP_WAITIN:
+         opc = OPC_WIN;
+         break;
+      default:
+         unreachable("");
+      }
+
+      afuc_set_opc(&instr, opc, ai->rep);
+
+      write(outfd, &instr, 4);
+   }
 }
 
-static int find_enum_val(struct rnnenum *en, const char *name)
+static int
+find_enum_val(struct rnnenum *en, const char *name)
 {
-       int i;
+   int i;
 
-       for (i = 0; i < en->valsnum; i++)
-               if (en->vals[i]->valvalid && !strcmp(name, en->vals[i]->name))
-                       return en->vals[i]->value;
+   for (i = 0; i < en->valsnum; i++)
+      if (en->vals[i]->valvalid && !strcmp(name, en->vals[i]->name))
+         return en->vals[i]->value;
 
-       return -1;
+   return -1;
 }
 
-static int find_reg(struct rnndomain *dom, const char *name)
+static int
+find_reg(struct rnndomain *dom, const char *name)
 {
-       int i;
+   int i;
 
-       for (i = 0; i < dom->subelemsnum; i++)
-               if (!strcmp(name, dom->subelems[i]->name))
-                       return dom->subelems[i]->offset;
+   for (i = 0; i < dom->subelemsnum; i++)
+      if (!strcmp(name, dom->subelems[i]->name))
+         return dom->subelems[i]->offset;
 
-       return -1;
+   return -1;
 }
 
-unsigned parse_control_reg(const char *name)
+unsigned
+parse_control_reg(const char *name)
 {
-       /* skip leading "@" */
-       int val = find_reg(control_regs, name + 1);
-       if (val < 0) {
-               printf("invalid control reg: %s\n", name);
-               exit(2);
-       }
-       return (unsigned)val;
+   /* skip leading "@" */
+   int val = find_reg(control_regs, name + 1);
+   if (val < 0) {
+      printf("invalid control reg: %s\n", name);
+      exit(2);
+   }
+   return (unsigned)val;
 }
 
-static void emit_jumptable(int outfd)
+static void
+emit_jumptable(int outfd)
 {
-       struct rnnenum *en = rnn_findenum(ctx->db, "adreno_pm4_type3_packets");
-       uint32_t jmptable[0x80] = {0};
-       int i;
-
-       for (i = 0; i < num_labels; i++) {
-               struct asm_label *label = &labels[i];
-               int id = find_enum_val(en, label->label);
-
-               /* if it doesn't match a known PM4 packet-id, try to match UNKN%d: */
-               if (id < 0) {
-                       if (sscanf(label->label, "UNKN%d", &id) != 1) {
-                               /* if still not found, must not belong in jump-table: */
-                               continue;
-                       }
-               }
-
-               jmptable[id] = label->offset;
-       }
-
-       write(outfd, jmptable, sizeof(jmptable));
+   struct rnnenum *en = rnn_findenum(ctx->db, "adreno_pm4_type3_packets");
+   uint32_t jmptable[0x80] = {0};
+   int i;
+
+   for (i = 0; i < num_labels; i++) {
+      struct asm_label *label = &labels[i];
+      int id = find_enum_val(en, label->label);
+
+      /* if it doesn't match a known PM4 packet-id, try to match UNKN%d: */
+      if (id < 0) {
+         if (sscanf(label->label, "UNKN%d", &id) != 1) {
+            /* if still not found, must not belong in jump-table: */
+            continue;
+         }
+      }
+
+      jmptable[id] = label->offset;
+   }
+
+   write(outfd, jmptable, sizeof(jmptable));
 }
 
-static void usage(void)
+static void
+usage(void)
 {
-       fprintf(stderr, "Usage:\n"
-                       "\tasm [-g GPUVER] filename.asm filename.fw\n"
-                       "\t\t-g - specify GPU version (5, etc)\n"
-               );
-       exit(2);
+   fprintf(stderr, "Usage:\n"
+                   "\tasm [-g GPUVER] filename.asm filename.fw\n"
+                   "\t\t-g - specify GPU version (5, etc)\n");
+   exit(2);
 }
 
-int main(int argc, char **argv)
+int
+main(int argc, char **argv)
 {
-       FILE *in;
-       char *file, *outfile, *name, *control_reg_name;
-       int c, ret, outfd;
-
-       /* Argument parsing: */
-       while ((c = getopt (argc, argv, "g:")) != -1) {
-               switch (c) {
-                       case 'g':
-                               gpuver = atoi(optarg);
-                               break;
-                       default:
-                               usage();
-               }
-       }
-
-       if (optind >= (argc + 1)) {
-               fprintf(stderr, "no file specified!\n");
-               usage();
-       }
-
-       file = argv[optind];
-       outfile = argv[optind + 1];
-
-       outfd = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
-       if (outfd < 0) {
-               fprintf(stderr, "could not open \"%s\"\n", outfile);
-               usage();
-       }
-
-       in = fopen(file, "r");
-       if (!in) {
-               fprintf(stderr, "could not open \"%s\"\n", file);
-               usage();
-       }
-
-       yyset_in(in);
-
-       /* if gpu version not specified, infer from filename: */
-       if (!gpuver) {
-               if (strstr(file, "a5")) {
-                       gpuver = 5;
-               } else if (strstr(file, "a6")) {
-                       gpuver = 6;
-               }
-       }
-
-       switch (gpuver) {
-       case 6:
-               name = "A6XX";
-               control_reg_name = "A6XX_CONTROL_REG";
-               break;
-       case 5:
-               name = "A5XX";
-               control_reg_name = "A5XX_CONTROL_REG";
-               break;
-       default:
-               fprintf(stderr, "unknown GPU version!\n");
-               usage();
-       }
-
-       rnn_init();
-       db = rnn_newdb();
-
-       ctx = rnndec_newcontext(db);
-
-       rnn_parsefile(db, "adreno.xml");
-       rnn_prepdb(db);
-       if (db->estatus)
-               errx(db->estatus, "failed to parse register database");
-       dom[0] = rnn_finddomain(db, name);
-       dom[1] = rnn_finddomain(db, "AXXX");
-       control_regs = rnn_finddomain(db, control_reg_name);
-
-       ret = yyparse();
-       if (ret) {
-               fprintf(stderr, "parse failed: %d\n", ret);
-               return ret;
-       }
-
-       emit_instructions(outfd);
-       emit_jumptable(outfd);
-
-       close(outfd);
-
-       return 0;
+   FILE *in;
+   char *file, *outfile, *name, *control_reg_name;
+   int c, ret, outfd;
+
+   /* Argument parsing: */
+   while ((c = getopt(argc, argv, "g:")) != -1) {
+      switch (c) {
+      case 'g':
+         gpuver = atoi(optarg);
+         break;
+      default:
+         usage();
+      }
+   }
+
+   if (optind >= (argc + 1)) {
+      fprintf(stderr, "no file specified!\n");
+      usage();
+   }
+
+   file = argv[optind];
+   outfile = argv[optind + 1];
+
+   outfd = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+   if (outfd < 0) {
+      fprintf(stderr, "could not open \"%s\"\n", outfile);
+      usage();
+   }
+
+   in = fopen(file, "r");
+   if (!in) {
+      fprintf(stderr, "could not open \"%s\"\n", file);
+      usage();
+   }
+
+   yyset_in(in);
+
+   /* if gpu version not specified, infer from filename: */
+   if (!gpuver) {
+      if (strstr(file, "a5")) {
+         gpuver = 5;
+      } else if (strstr(file, "a6")) {
+         gpuver = 6;
+      }
+   }
+
+   switch (gpuver) {
+   case 6:
+      name = "A6XX";
+      control_reg_name = "A6XX_CONTROL_REG";
+      break;
+   case 5:
+      name = "A5XX";
+      control_reg_name = "A5XX_CONTROL_REG";
+      break;
+   default:
+      fprintf(stderr, "unknown GPU version!\n");
+      usage();
+   }
+
+   rnn_init();
+   db = rnn_newdb();
+
+   ctx = rnndec_newcontext(db);
+
+   rnn_parsefile(db, "adreno.xml");
+   rnn_prepdb(db);
+   if (db->estatus)
+      errx(db->estatus, "failed to parse register database");
+   dom[0] = rnn_finddomain(db, name);
+   dom[1] = rnn_finddomain(db, "AXXX");
+   control_regs = rnn_finddomain(db, control_reg_name);
+
+   ret = yyparse();
+   if (ret) {
+      fprintf(stderr, "parse failed: %d\n", ret);
+      return ret;
+   }
+
+   emit_instructions(outfd);
+   emit_jumptable(outfd);
+
+   close(outfd);
+
+   return 0;
 }
index e0e6033..3f5931f 100644 (file)
@@ -24,8 +24,8 @@
 #ifndef _ASM_H_
 #define _ASM_H_
 
-#include <stdint.h>
 #include <stdbool.h>
+#include <stdint.h>
 #include "afuc.h"
 
 extern int gpuver;
@@ -37,78 +37,77 @@ extern int gpuver;
  * about the different encodings for 2src regs vs 1src+immed, or mnemonics
  */
 struct asm_instruction {
-       int tok;
-       int dst;
-       int src1;
-       int src2;
-       int immed;
-       int shift;
-       int bit;
-       int xmov;
-       uint32_t literal;
-       const char *label;
-
-       bool has_immed : 1;
-       bool has_shift : 1;
-       bool has_bit   : 1;
-       bool is_literal : 1;
-       bool rep        : 1;
+   int tok;
+   int dst;
+   int src1;
+   int src2;
+   int immed;
+   int shift;
+   int bit;
+   int xmov;
+   uint32_t literal;
+   const char *label;
+
+   bool has_immed : 1;
+   bool has_shift : 1;
+   bool has_bit : 1;
+   bool is_literal : 1;
+   bool rep : 1;
 };
 
 struct asm_label {
-       unsigned offset;
-       const char *label;
+   unsigned offset;
+   const char *label;
 };
 
 struct asm_instruction *next_instr(int tok);
 void decl_label(const char *str);
 
-
 static inline uint32_t
 parse_reg(const char *str)
 {
-       char *retstr;
-       long int ret;
-
-       if (!strcmp(str, "$rem"))
-               return 0x1c;
-       else if (!strcmp(str, "$addr"))
-               return 0x1d;
-       else if (!strcmp(str, "$addr2"))
-               return 0x1e;
-       else if (!strcmp(str, "$data"))
-               return 0x1f;
-
-       ret = strtol(str + 1, &retstr, 16);
-
-       if (*retstr != '\0') {
-               printf("invalid register: %s\n", str);
-               exit(2);
-       }
-
-       return ret;
+   char *retstr;
+   long int ret;
+
+   if (!strcmp(str, "$rem"))
+      return 0x1c;
+   else if (!strcmp(str, "$addr"))
+      return 0x1d;
+   else if (!strcmp(str, "$addr2"))
+      return 0x1e;
+   else if (!strcmp(str, "$data"))
+      return 0x1f;
+
+   ret = strtol(str + 1, &retstr, 16);
+
+   if (*retstr != '\0') {
+      printf("invalid register: %s\n", str);
+      exit(2);
+   }
+
+   return ret;
 }
 
 static inline uint32_t
 parse_literal(const char *str)
 {
-       char *retstr;
-       long int ret;
+   char *retstr;
+   long int ret;
 
-       ret = strtol(str + 1, &retstr, 16);
+   ret = strtol(str + 1, &retstr, 16);
 
-       if (*retstr != ']') {
-               printf("invalid literal: %s\n", str);
-               exit(2);
-       }
+   if (*retstr != ']') {
+      printf("invalid literal: %s\n", str);
+      exit(2);
+   }
 
-       return ret;
+   return ret;
 }
 
 static inline uint32_t
 parse_bit(const char *str)
 {
-       return strtol(str + 1, NULL, 10);
+   return strtol(str + 1, NULL, 10);
 }
 
 unsigned parse_control_reg(const char *name);
@@ -117,12 +116,11 @@ unsigned parse_control_reg(const char *name);
 static inline const char *
 parse_label_decl(const char *str)
 {
-       char *s = strdup(str);
-       s[strlen(s) - 1] = '\0';
-       return s;
+   char *s = strdup(str);
+   s[strlen(s) - 1] = '\0';
+   return s;
 }
 
-void yyset_in (FILE *  _in_str );
-
+void yyset_in(FILE *_in_str);
 
 #endif /* _ASM_H_ */
index 0a62ba7..b761cac 100644 (file)
  * SOFTWARE.
  */
 
+#include <assert.h>
 #include <err.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <unistd.h>
 #include <fcntl.h>
+#include <getopt.h>
 #include <stdarg.h>
-#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
-#include <assert.h>
-#include <getopt.h>
+#include <unistd.h>
 
 #include "util/os_file.h"
 
@@ -41,7 +41,6 @@
 
 static int gpuver;
 
-
 static struct rnndeccontext *ctx;
 static struct rnndb *db;
 static struct rnndomain *control_regs;
@@ -54,816 +53,827 @@ const char *variant;
  */
 static bool verbose = false;
 
-static void print_gpu_reg(uint32_t regbase)
+static void
+print_gpu_reg(uint32_t regbase)
 {
-       struct rnndomain *d = NULL;
-
-       if (regbase < 0x100)
-               return;
-
-       if (rnndec_checkaddr(ctx, dom[0], regbase, 0))
-               d = dom[0];
-       else if (rnndec_checkaddr(ctx, dom[1], regbase, 0))
-               d = dom[1];
-
-       if (d) {
-               struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, d, regbase, 0);
-               if (info) {
-                       printf("\t; %s", info->name);
-                       free(info->name);
-                       free(info);
-                       return;
-               }
-       }
+   struct rnndomain *d = NULL;
+
+   if (regbase < 0x100)
+      return;
+
+   if (rnndec_checkaddr(ctx, dom[0], regbase, 0))
+      d = dom[0];
+   else if (rnndec_checkaddr(ctx, dom[1], regbase, 0))
+      d = dom[1];
+
+   if (d) {
+      struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, d, regbase, 0);
+      if (info) {
+         printf("\t; %s", info->name);
+         free(info->name);
+         free(info);
+         return;
+      }
+   }
 }
 
-static void printc(const char *c, const char *fmt, ...)
+static void
+printc(const char *c, const char *fmt, ...)
 {
-       va_list args;
-       printf("%s", c);
-       va_start(args, fmt);
-       vprintf(fmt, args);
-       va_end(args);
-       printf("%s", ctx->colors->reset);
+   va_list args;
+   printf("%s", c);
+   va_start(args, fmt);
+   vprintf(fmt, args);
+   va_end(args);
+   printf("%s", ctx->colors->reset);
 }
 
 #define printerr(fmt, ...) printc(ctx->colors->err, fmt, ##__VA_ARGS__)
 #define printlbl(fmt, ...) printc(ctx->colors->btarg, fmt, ##__VA_ARGS__)
 
-static void print_reg(unsigned reg)
+static void
+print_reg(unsigned reg)
 {
-// XXX seems like *reading* $00 --> literal zero??
-// seems like read from $1c gives packet remaining len??
-// $01 current packet header, writing to $01 triggers
-// parsing header and jumping to appropriate handler.
-       if (reg == 0x1c)
-               printf("$rem");      /* remainding dwords in packet */
-       else if (reg == 0x1d)
-               printf("$addr");
-       else if (reg == 0x1e)
-               printf("$addr2");   // XXX
-       else if (reg == 0x1f)
-               printf("$data");
-       else
-               printf("$%02x", reg);
+   // XXX seems like *reading* $00 --> literal zero??
+   // seems like read from $1c gives packet remaining len??
+   // $01 current packet header, writing to $01 triggers
+   // parsing header and jumping to appropriate handler.
+   if (reg == 0x1c)
+      printf("$rem"); /* remainding dwords in packet */
+   else if (reg == 0x1d)
+      printf("$addr");
+   else if (reg == 0x1e)
+      printf("$addr2"); // XXX
+   else if (reg == 0x1f)
+      printf("$data");
+   else
+      printf("$%02x", reg);
 }
 
-static void print_src(unsigned reg)
+static void
+print_src(unsigned reg)
 {
-       print_reg(reg);
+   print_reg(reg);
 }
 
-static void print_dst(unsigned reg)
+static void
+print_dst(unsigned reg)
 {
-       print_reg(reg);
+   print_reg(reg);
 }
 
-static void print_alu_name(afuc_opc opc, uint32_t instr)
+static void
+print_alu_name(afuc_opc opc, uint32_t instr)
 {
-       if (opc == OPC_ADD) {
-               printf("add ");
-       } else if (opc == OPC_ADDHI) {
-               printf("addhi ");
-       } else if (opc == OPC_SUB) {
-               printf("sub ");
-       } else if (opc == OPC_SUBHI) {
-               printf("subhi ");
-       } else if (opc == OPC_AND) {
-               printf("and ");
-       } else if (opc == OPC_OR) {
-               printf("or ");
-       } else if (opc == OPC_XOR) {
-               printf("xor ");
-       } else if (opc == OPC_NOT) {
-               printf("not ");
-       } else if (opc == OPC_SHL) {
-               printf("shl ");
-       } else if (opc == OPC_USHR) {
-               printf("ushr ");
-       } else if (opc == OPC_ISHR) {
-               printf("ishr ");
-       } else if (opc == OPC_ROT) {
-               printf("rot ");
-       } else if (opc == OPC_MUL8) {
-               printf("mul8 ");
-       } else if (opc == OPC_MIN) {
-               printf("min ");
-       } else if (opc == OPC_MAX) {
-               printf("max ");
-       } else if (opc == OPC_CMP) {
-               printf("cmp ");
-       } else if (opc == OPC_MSB) {
-               printf("msb ");
-       } else {
-               printerr("[%08x]", instr);
-               printf("  ; alu%02x ", opc);
-       }
+   if (opc == OPC_ADD) {
+      printf("add ");
+   } else if (opc == OPC_ADDHI) {
+      printf("addhi ");
+   } else if (opc == OPC_SUB) {
+      printf("sub ");
+   } else if (opc == OPC_SUBHI) {
+      printf("subhi ");
+   } else if (opc == OPC_AND) {
+      printf("and ");
+   } else if (opc == OPC_OR) {
+      printf("or ");
+   } else if (opc == OPC_XOR) {
+      printf("xor ");
+   } else if (opc == OPC_NOT) {
+      printf("not ");
+   } else if (opc == OPC_SHL) {
+      printf("shl ");
+   } else if (opc == OPC_USHR) {
+      printf("ushr ");
+   } else if (opc == OPC_ISHR) {
+      printf("ishr ");
+   } else if (opc == OPC_ROT) {
+      printf("rot ");
+   } else if (opc == OPC_MUL8) {
+      printf("mul8 ");
+   } else if (opc == OPC_MIN) {
+      printf("min ");
+   } else if (opc == OPC_MAX) {
+      printf("max ");
+   } else if (opc == OPC_CMP) {
+      printf("cmp ");
+   } else if (opc == OPC_MSB) {
+      printf("msb ");
+   } else {
+      printerr("[%08x]", instr);
+      printf("  ; alu%02x ", opc);
+   }
 }
 
-static const char *getpm4(uint32_t id)
+static const char *
+getpm4(uint32_t id)
 {
-       return rnndec_decode_enum(ctx, "adreno_pm4_type3_packets", id);
+   return rnndec_decode_enum(ctx, "adreno_pm4_type3_packets", id);
 }
 
 static inline unsigned
 _odd_parity_bit(unsigned val)
 {
-       /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
-        * note that we want odd parity so 0x6996 is inverted.
-        */
-       val ^= val >> 16;
-       val ^= val >> 8;
-       val ^= val >> 4;
-       val &= 0xf;
-       return (~0x6996 >> val) & 1;
+   /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
+    * note that we want odd parity so 0x6996 is inverted.
+    */
+   val ^= val >> 16;
+   val ^= val >> 8;
+   val ^= val >> 4;
+   val &= 0xf;
+   return (~0x6996 >> val) & 1;
 }
 
 static struct {
-       uint32_t offset;
-       uint32_t num_jump_labels;
-       uint32_t jump_labels[256];
+   uint32_t offset;
+   uint32_t num_jump_labels;
+   uint32_t jump_labels[256];
 } jump_labels[1024];
 int num_jump_labels;
 
-static void add_jump_table_entry(uint32_t n, uint32_t offset)
+static void
+add_jump_table_entry(uint32_t n, uint32_t offset)
 {
-       int i;
+   int i;
 
-       if (n > 128) /* can't possibly be a PM4 PKT3.. */
-               return;
+   if (n > 128) /* can't possibly be a PM4 PKT3.. */
+      return;
 
-       for (i = 0; i < num_jump_labels; i++)
-               if (jump_labels[i].offset == offset)
-                       goto add_label;
+   for (i = 0; i < num_jump_labels; i++)
+      if (jump_labels[i].offset == offset)
+         goto add_label;
 
-       num_jump_labels = i + 1;
-       jump_labels[i].offset = offset;
-       jump_labels[i].num_jump_labels = 0;
+   num_jump_labels = i + 1;
+   jump_labels[i].offset = offset;
+   jump_labels[i].num_jump_labels = 0;
 
 add_label:
-       jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n;
-       assert(jump_labels[i].num_jump_labels < 256);
+   jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n;
+   assert(jump_labels[i].num_jump_labels < 256);
 }
 
-static int get_jump_table_entry(uint32_t offset)
+static int
+get_jump_table_entry(uint32_t offset)
 {
-       int i;
+   int i;
 
-       for (i = 0; i < num_jump_labels; i++)
-               if (jump_labels[i].offset == offset)
-                       return i;
+   for (i = 0; i < num_jump_labels; i++)
+      if (jump_labels[i].offset == offset)
+         return i;
 
-       return -1;
+   return -1;
 }
 
 static uint32_t label_offsets[0x512];
 static int num_label_offsets;
 
-static int label_idx(uint32_t offset, bool create)
+static int
+label_idx(uint32_t offset, bool create)
 {
-       int i;
-       for (i = 0; i < num_label_offsets; i++)
-               if (offset == label_offsets[i])
-                       return i;
-       if (!create)
-               return -1;
-       label_offsets[i] = offset;
-       num_label_offsets = i+1;
-       return i;
+   int i;
+   for (i = 0; i < num_label_offsets; i++)
+      if (offset == label_offsets[i])
+         return i;
+   if (!create)
+      return -1;
+   label_offsets[i] = offset;
+   num_label_offsets = i + 1;
+   return i;
 }
 
 static const char *
 label_name(uint32_t offset, bool allow_jt)
 {
-       static char name[12];
-       int lidx;
-
-       if (allow_jt) {
-               lidx = get_jump_table_entry(offset);
-               if (lidx >= 0) {
-                       int j;
-                       for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) {
-                               uint32_t jump_label = jump_labels[lidx].jump_labels[j];
-                               const char *str = getpm4(jump_label);
-                               if (str)
-                                       return str;
-                       }
-                       // if we don't find anything w/ known name, maybe we should
-                       // return UNKN%d to at least make it clear that this is some
-                       // sort of jump-table entry?
-               }
-       }
-
-       lidx = label_idx(offset, false);
-       if (lidx < 0)
-               return NULL;
-       sprintf(name, "l%03d", lidx);
-       return name;
+   static char name[12];
+   int lidx;
+
+   if (allow_jt) {
+      lidx = get_jump_table_entry(offset);
+      if (lidx >= 0) {
+         int j;
+         for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) {
+            uint32_t jump_label = jump_labels[lidx].jump_labels[j];
+            const char *str = getpm4(jump_label);
+            if (str)
+               return str;
+         }
+         // if we don't find anything w/ known name, maybe we should
+         // return UNKN%d to at least make it clear that this is some
+         // sort of jump-table entry?
+      }
+   }
+
+   lidx = label_idx(offset, false);
+   if (lidx < 0)
+      return NULL;
+   sprintf(name, "l%03d", lidx);
+   return name;
 }
 
-
 static uint32_t fxn_offsets[0x512];
 static int num_fxn_offsets;
 
-static int fxn_idx(uint32_t offset, bool create)
+static int
+fxn_idx(uint32_t offset, bool create)
 {
-       int i;
-       for (i = 0; i < num_fxn_offsets; i++)
-               if (offset == fxn_offsets[i])
-                       return i;
-       if (!create)
-               return -1;
-       fxn_offsets[i] = offset;
-       num_fxn_offsets = i+1;
-       return i;
+   int i;
+   for (i = 0; i < num_fxn_offsets; i++)
+      if (offset == fxn_offsets[i])
+         return i;
+   if (!create)
+      return -1;
+   fxn_offsets[i] = offset;
+   num_fxn_offsets = i + 1;
+   return i;
 }
 
 static const char *
 fxn_name(uint32_t offset)
 {
-       static char name[14];
-       int fidx = fxn_idx(offset, false);
-       if (fidx < 0)
-               return NULL;
-       sprintf(name, "fxn%02d", fidx);
-       return name;
+   static char name[14];
+   int fidx = fxn_idx(offset, false);
+   if (fidx < 0)
+      return NULL;
+   sprintf(name, "fxn%02d", fidx);
+   return name;
 }
 
-static void print_control_reg(uint32_t id)
+static void
+print_control_reg(uint32_t id)
 {
-       if (rnndec_checkaddr(ctx, control_regs, id, 0)) {
-               struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, control_regs, id, 0);
-               printf("@%s", info->name);
-               free(info->name);
-               free(info);
-       } else {
-               printf("0x%03x", id);
-       }
+   if (rnndec_checkaddr(ctx, control_regs, id, 0)) {
+      struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, control_regs, id, 0);
+      printf("@%s", info->name);
+      free(info->name);
+      free(info);
+   } else {
+      printf("0x%03x", id);
+   }
 }
 
-static void disasm(uint32_t *buf, int sizedwords)
+static void
+disasm(uint32_t *buf, int sizedwords)
 {
-       uint32_t *instrs = buf;
-       const int jmptbl_start = instrs[1] & 0xffff;
-       uint32_t *jmptbl = &buf[jmptbl_start];
-       afuc_opc opc;
-       bool rep;
-       int i;
-
-
-       /* parse jumptable: */
-       for (i = 0; i < 0x80; i++) {
-               unsigned offset = jmptbl[i];
-               unsigned n = i;// + CP_NOP;
-               add_jump_table_entry(n, offset);
-       }
-
-       /* do a pre-pass to find instructions that are potential branch targets,
-        * and add labels for them:
-        */
-       for (i = 0; i < jmptbl_start; i++) {
-               afuc_instr *instr = (void *)&instrs[i];
-
-               afuc_get_opc(instr, &opc, &rep);
-
-               switch (opc) {
-               case OPC_BRNEI:
-               case OPC_BREQI:
-               case OPC_BRNEB:
-               case OPC_BREQB:
-                       label_idx(i + instr->br.ioff, true);
-                       break;
-               case OPC_PREEMPTLEAVE6:
-                       if (gpuver >= 6)
-                               label_idx(instr->call.uoff, true);
-                       break;
-               case OPC_CALL:
-                       fxn_idx(instr->call.uoff, true);
-                       break;
-               case OPC_SETSECURE:
-                       /* this implicitly jumps to pc + 3 if successful */
-                       label_idx(i + 3, true);
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       /* print instructions: */
-       for (i = 0; i < jmptbl_start; i++) {
-               int jump_label_idx;
-               afuc_instr *instr = (void *)&instrs[i];
-               const char *fname, *lname;
-               afuc_opc opc;
-               bool rep;
-
-               afuc_get_opc(instr, &opc, &rep);
-
-               lname = label_name(i, false);
-               fname = fxn_name(i);
-               jump_label_idx = get_jump_table_entry(i);
-
-               if (jump_label_idx >= 0) {
-                       int j;
-                       printf("\n");
-                       for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) {
-                               uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j];
-                               const char *name = getpm4(jump_label);
-                               if (name) {
-                                       printlbl("%s", name);
-                               } else {
-                                       printlbl("UNKN%d", jump_label);
-                               }
-                               printf(":\n");
-                       }
-               }
-
-               if (fname) {
-                       printlbl("%s", fname);
-                       printf(":\n");
-               }
-
-               if (lname) {
-                       printlbl(" %s", lname);
-                       printf(":");
-               } else {
-                       printf("      ");
-               }
-
-
-               if (verbose) {
-                       printf("\t%04x: %08x  ", i, instrs[i]);
-               } else {
-                       printf("  ");
-               }
-
-               switch (opc) {
-               case OPC_NOP: {
-                       /* a6xx changed the default immediate, and apparently 0
-                        * is illegal now.
-                        */
-                       const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0;
-                       if (instrs[i] != nop) {
-                               printerr("[%08x]", instrs[i]);
-                               printf("  ; ");
-                       }
-                       if (rep)
-                               printf("(rep)");
-                       printf("nop");
-                       print_gpu_reg(instrs[i]);
-
-                       break;
-               }
-               case OPC_ADD:
-               case OPC_ADDHI:
-               case OPC_SUB:
-               case OPC_SUBHI:
-               case OPC_AND:
-               case OPC_OR:
-               case OPC_XOR:
-               case OPC_NOT:
-               case OPC_SHL:
-               case OPC_USHR:
-               case OPC_ISHR:
-               case OPC_ROT:
-               case OPC_MUL8:
-               case OPC_MIN:
-               case OPC_MAX:
-               case OPC_CMP: {
-                       bool src1 = true;
-
-                       if (opc == OPC_NOT)
-                               src1 = false;
-
-                       if (rep)
-                               printf("(rep)");
-
-                       print_alu_name(opc, instrs[i]);
-                       print_dst(instr->alui.dst);
-                       printf(", ");
-                       if (src1) {
-                               print_src(instr->alui.src);
-                               printf(", ");
-                       }
-                       printf("0x%04x", instr->alui.uimm);
-                       print_gpu_reg(instr->alui.uimm);
-
-                       /* print out unexpected bits: */
-                       if (verbose) {
-                               if (instr->alui.src && !src1)
-                                       printerr("  (src=%02x)", instr->alui.src);
-                       }
-
-                       break;
-               }
-               case OPC_MOVI: {
-                       if (rep)
-                               printf("(rep)");
-                       printf("mov ");
-                       print_dst(instr->movi.dst);
-                       printf(", 0x%04x", instr->movi.uimm);
-                       if (instr->movi.shift)
-                               printf(" << %u", instr->movi.shift);
-
-                       /* using mov w/ << 16 is popular way to construct a pkt7
-                        * header to send (for ex, from PFP to ME), so check that
-                        * case first
-                        */
-                       if ((instr->movi.shift == 16) &&
-                                       ((instr->movi.uimm & 0xff00) == 0x7000)) {
-                               unsigned opc, p;
-
-                               opc = instr->movi.uimm & 0x7f;
-                               p = _odd_parity_bit(opc);
-
-                               /* So, you'd think that checking the parity bit would be
-                                * a good way to rule out false positives, but seems like
-                                * ME doesn't really care.. at least it would filter out
-                                * things that look like actual legit packets between
-                                * PFP and ME..
-                                */
-                               if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) {
-                                       const char *name = getpm4(opc);
-                                       printf("\t; ");
-                                       if (name)
-                                               printlbl("%s", name);
-                                       else
-                                               printlbl("UNKN%u", opc);
-                                       break;
-                               }
-                       }
-
-                       print_gpu_reg(instr->movi.uimm << instr->movi.shift);
-
-                       break;
-               }
-               case OPC_ALU: {
-                       bool src1 = true;
-
-                       if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB)
-                               src1 = false;
-
-                       if (instr->alu.pad)
-                               printf("[%08x]  ; ", instrs[i]);
-
-                       if (rep)
-                               printf("(rep)");
-                       if (instr->alu.xmov)
-                               printf("(xmov%d)", instr->alu.xmov);
-
-                       /* special case mnemonics:
-                        *   reading $00 seems to always yield zero, and so:
-                        *      or $dst, $00, $src -> mov $dst, $src
-                        *   Maybe add one for negate too, ie.
-                        *      sub $dst, $00, $src ???
-                        */
-                       if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) {
-                               printf("mov ");
-                               src1 = false;
-                       } else {
-                               print_alu_name(instr->alu.alu, instrs[i]);
-                       }
-
-                       print_dst(instr->alu.dst);
-                       if (src1) {
-                               printf(", ");
-                               print_src(instr->alu.src1);
-                       }
-                       printf(", ");
-                       print_src(instr->alu.src2);
-
-                       /* print out unexpected bits: */
-                       if (verbose) {
-                               if (instr->alu.pad)
-                                       printerr("  (pad=%01x)", instr->alu.pad);
-                               if (instr->alu.src1 && !src1)
-                                       printerr("  (src1=%02x)", instr->alu.src1);
-                       }
-
-                       /* xmov is a modifier that makes the processor execute up to 3
-                        * extra mov's after the current instruction. Given an ALU
-                        * instruction:
-                        *
-                        * (xmovN) alu $dst, $src1, $src2
-                        *
-                        * In all of the uses in the firmware blob, $dst and $src2 are one
-                        * of the "special" registers $data, $addr, $addr2. I've observed
-                        * that if $dst isn't "special" then it's replaced with $00
-                        * instead of $data, but I haven't checked what happens if $src2
-                        * isn't "special".  Anyway, in the usual case, the HW produces a
-                        * count M = min(N, $rem) and then does the following:
-                        *
-                        * M = 1:
-                        * mov $data, $src2
-                        *
-                        * M = 2:
-                        * mov $data, $src2
-                        * mov $data, $src2
-                        *
-                        * M = 3:
-                        * mov $data, $src2
-                        * mov $dst, $src2 (special case for CP_CONTEXT_REG_BUNCH)
-                        * mov $data, $src2
-                        *
-                        * It seems to be frequently used in combination with (rep) to
-                        * provide a kind of hardware-based loop unrolling, and there's
-                        * even a special case in the ISA to be able to do this with
-                        * CP_CONTEXT_REG_BUNCH. However (rep) isn't required.
-                        *
-                        * This dumps the expected extra instructions, assuming that $rem
-                        * isn't too small.
-                        */
-                       if (verbose && instr->alu.xmov) {
-                               for (int i = 0; i < instr->alu.xmov; i++) {
-                                       printf("\n        ; mov ");
-                                       if (instr->alu.dst < 0x1d)
-                                               printf("$00");
-                                       else if (instr->alu.xmov == 3 && i == 1)
-                                               print_dst(instr->alu.dst);
-                                       else
-                                               printf("$data");
-                                       printf(", ");
-                                       print_src(instr->alu.src2);
-                               }
-                       }
-
-                       break;
-               }
-               case OPC_CWRITE6:
-               case OPC_CREAD6:
-               case OPC_STORE6:
-               case OPC_LOAD6: {
-                       if (rep)
-                               printf("(rep)");
-
-                       bool is_control_reg = true;
-                       if (gpuver >= 6) {
-                               switch (opc) {
-                               case OPC_CWRITE6:
-                                       printf("cwrite ");
-                                       break;
-                               case OPC_CREAD6:
-                                       printf("cread ");
-                                       break;
-                               case OPC_STORE6:
-                                       is_control_reg = false;
-                                       printf("store ");
-                                       break;
-                               case OPC_LOAD6:
-                                       is_control_reg = false;
-                                       printf("load ");
-                                       break;
-                               default:
-                                       assert(!"unreachable");
-                               }
-                       } else {
-                               switch (opc) {
-                               case OPC_CWRITE5:
-                                       printf("cwrite ");
-                                       break;
-                               case OPC_CREAD5:
-                                       printf("cread ");
-                                       break;
-                               default:
-                                       fprintf(stderr, "A6xx control opcode on A5xx?\n");
-                                       exit(1);
-                               }
-                       }
-
-                       print_src(instr->control.src1);
-                       printf(", [");
-                       print_src(instr->control.src2);
-                       printf(" + ");
-                       if (is_control_reg && instr->control.flags != 0x4)
-                               print_control_reg(instr->control.uimm);
-                       else
-                               printf("0x%03x", instr->control.uimm);
-                       printf("], 0x%x", instr->control.flags);
-                       break;
-               }
-               case OPC_BRNEI:
-               case OPC_BREQI:
-               case OPC_BRNEB:
-               case OPC_BREQB: {
-                       unsigned off = i + instr->br.ioff;
-
-                       assert(!rep);
-
-                       /* Since $00 reads back zero, it can be used as src for
-                        * unconditional branches.  (This only really makes sense
-                        * for the BREQB.. or possible BRNEI if imm==0.)
-                        *
-                        * If bit=0 then branch is taken if *all* bits are zero.
-                        * Otherwise it is taken if bit (bit-1) is clear.
-                        *
-                        * Note the instruction after a jump/branch is executed
-                        * regardless of whether branch is taken, so use nop or
-                        * take that into account in code.
-                        */
-                       if (instr->br.src || (opc != OPC_BRNEB)) {
-                               bool immed = false;
-
-                               if (opc == OPC_BRNEI) {
-                                       printf("brne ");
-                                       immed = true;
-                               } else if (opc == OPC_BREQI) {
-                                       printf("breq ");
-                                       immed = true;
-                               } else if (opc == OPC_BRNEB) {
-                                       printf("brne ");
-                               } else if (opc == OPC_BREQB) {
-                                       printf("breq ");
-                               }
-                               print_src(instr->br.src);
-                               if (immed) {
-                                       printf(", 0x%x,", instr->br.bit_or_imm);
-                               } else {
-                                       printf(", b%u,", instr->br.bit_or_imm);
-                               }
-                       } else {
-                               printf("jump");
-                               if (verbose && instr->br.bit_or_imm) {
-                                       printerr("  (src=%03x, bit=%03x) ",
-                                               instr->br.src, instr->br.bit_or_imm);
-                               }
-                       }
-
-                       printf(" #");
-                       printlbl("%s", label_name(off, true));
-                       if (verbose)
-                               printf(" (#%d, %04x)", instr->br.ioff, off);
-                       break;
-               }
-               case OPC_CALL:
-                       assert(!rep);
-                       printf("call #");
-                       printlbl("%s", fxn_name(instr->call.uoff));
-                       if (verbose) {
-                               printf(" (%04x)", instr->call.uoff);
-                               if (instr->br.bit_or_imm || instr->br.src) {
-                                       printerr("  (src=%03x, bit=%03x) ",
-                                               instr->br.src, instr->br.bit_or_imm);
-                               }
-                       }
-                       break;
-               case OPC_RET:
-                       assert(!rep);
-                       if (instr->ret.pad)
-                               printf("[%08x]  ; ", instrs[i]);
-                       if (instr->ret.interrupt)
-                               printf("iret");
-                       else
-                               printf("ret");
-                       break;
-               case OPC_WIN:
-                       assert(!rep);
-                       if (instr->waitin.pad)
-                               printf("[%08x]  ; ", instrs[i]);
-                       printf("waitin");
-                       if (verbose && instr->waitin.pad)
-                               printerr("  (pad=%x)", instr->waitin.pad);
-                       break;
-               case OPC_PREEMPTLEAVE6:
-                       if (gpuver < 6) {
-                               printf("[%08x]  ; op38", instrs[i]);
-                       } else {
-                               printf("preemptleave #");
-                               printlbl("%s", label_name(instr->call.uoff, true));
-                       }
-                       break;
-               case OPC_SETSECURE:
-                       /* Note: This seems to implicitly read the secure/not-secure state
-                        * to set from the low bit of $02, and implicitly jumps to pc + 3
-                        * (i.e. skipping the next two instructions) if it succeeds. We
-                        * print these implicit parameters to make reading the disassembly
-                        * easier.
-                        */
-                       if (instr->pad)
-                               printf("[%08x]  ; ", instrs[i]);
-                       printf("setsecure $02, #");
-                       printlbl("%s", label_name(i + 3, true));
-                       break;
-               default:
-                       printerr("[%08x]", instrs[i]);
-                       printf("  ; op%02x ", opc);
-                       print_dst(instr->alui.dst);
-                       printf(", ");
-                       print_src(instr->alui.src);
-                       print_gpu_reg(instrs[i] & 0xffff);
-                       break;
-               }
-               printf("\n");
-       }
-
-       /* print jumptable: */
-       if (verbose) {
-               printf(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
-               printf("; JUMP TABLE\n");
-               for (i = 0; i < 0x7f; i++) {
-                       int n = i;// + CP_NOP;
-                       uint32_t offset = jmptbl[i];
-                       const char *name = getpm4(n);
-                       printf("%3d %02x: ", n, n);
-                       printf("%04x", offset);
-                       if (name) {
-                               printf("   ; %s", name);
-                       } else {
-                               printf("   ; UNKN%d", n);
-                       }
-                       printf("\n");
-               }
-       }
+   uint32_t *instrs = buf;
+   const int jmptbl_start = instrs[1] & 0xffff;
+   uint32_t *jmptbl = &buf[jmptbl_start];
+   afuc_opc opc;
+   bool rep;
+   int i;
+
+   /* parse jumptable: */
+   for (i = 0; i < 0x80; i++) {
+      unsigned offset = jmptbl[i];
+      unsigned n = i; // + CP_NOP;
+      add_jump_table_entry(n, offset);
+   }
+
+   /* do a pre-pass to find instructions that are potential branch targets,
+    * and add labels for them:
+    */
+   for (i = 0; i < jmptbl_start; i++) {
+      afuc_instr *instr = (void *)&instrs[i];
+
+      afuc_get_opc(instr, &opc, &rep);
+
+      switch (opc) {
+      case OPC_BRNEI:
+      case OPC_BREQI:
+      case OPC_BRNEB:
+      case OPC_BREQB:
+         label_idx(i + instr->br.ioff, true);
+         break;
+      case OPC_PREEMPTLEAVE6:
+         if (gpuver >= 6)
+            label_idx(instr->call.uoff, true);
+         break;
+      case OPC_CALL:
+         fxn_idx(instr->call.uoff, true);
+         break;
+      case OPC_SETSECURE:
+         /* this implicitly jumps to pc + 3 if successful */
+         label_idx(i + 3, true);
+         break;
+      default:
+         break;
+      }
+   }
+
+   /* print instructions: */
+   for (i = 0; i < jmptbl_start; i++) {
+      int jump_label_idx;
+      afuc_instr *instr = (void *)&instrs[i];
+      const char *fname, *lname;
+      afuc_opc opc;
+      bool rep;
+
+      afuc_get_opc(instr, &opc, &rep);
+
+      lname = label_name(i, false);
+      fname = fxn_name(i);
+      jump_label_idx = get_jump_table_entry(i);
+
+      if (jump_label_idx >= 0) {
+         int j;
+         printf("\n");
+         for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) {
+            uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j];
+            const char *name = getpm4(jump_label);
+            if (name) {
+               printlbl("%s", name);
+            } else {
+               printlbl("UNKN%d", jump_label);
+            }
+            printf(":\n");
+         }
+      }
+
+      if (fname) {
+         printlbl("%s", fname);
+         printf(":\n");
+      }
+
+      if (lname) {
+         printlbl(" %s", lname);
+         printf(":");
+      } else {
+         printf("      ");
+      }
+
+      if (verbose) {
+         printf("\t%04x: %08x  ", i, instrs[i]);
+      } else {
+         printf("  ");
+      }
+
+      switch (opc) {
+      case OPC_NOP: {
+         /* a6xx changed the default immediate, and apparently 0
+          * is illegal now.
+          */
+         const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0;
+         if (instrs[i] != nop) {
+            printerr("[%08x]", instrs[i]);
+            printf("  ; ");
+         }
+         if (rep)
+            printf("(rep)");
+         printf("nop");
+         print_gpu_reg(instrs[i]);
+
+         break;
+      }
+      case OPC_ADD:
+      case OPC_ADDHI:
+      case OPC_SUB:
+      case OPC_SUBHI:
+      case OPC_AND:
+      case OPC_OR:
+      case OPC_XOR:
+      case OPC_NOT:
+      case OPC_SHL:
+      case OPC_USHR:
+      case OPC_ISHR:
+      case OPC_ROT:
+      case OPC_MUL8:
+      case OPC_MIN:
+      case OPC_MAX:
+      case OPC_CMP: {
+         bool src1 = true;
+
+         if (opc == OPC_NOT)
+            src1 = false;
+
+         if (rep)
+            printf("(rep)");
+
+         print_alu_name(opc, instrs[i]);
+         print_dst(instr->alui.dst);
+         printf(", ");
+         if (src1) {
+            print_src(instr->alui.src);
+            printf(", ");
+         }
+         printf("0x%04x", instr->alui.uimm);
+         print_gpu_reg(instr->alui.uimm);
+
+         /* print out unexpected bits: */
+         if (verbose) {
+            if (instr->alui.src && !src1)
+               printerr("  (src=%02x)", instr->alui.src);
+         }
+
+         break;
+      }
+      case OPC_MOVI: {
+         if (rep)
+            printf("(rep)");
+         printf("mov ");
+         print_dst(instr->movi.dst);
+         printf(", 0x%04x", instr->movi.uimm);
+         if (instr->movi.shift)
+            printf(" << %u", instr->movi.shift);
+
+         /* using mov w/ << 16 is popular way to construct a pkt7
+          * header to send (for ex, from PFP to ME), so check that
+          * case first
+          */
+         if ((instr->movi.shift == 16) &&
+             ((instr->movi.uimm & 0xff00) == 0x7000)) {
+            unsigned opc, p;
+
+            opc = instr->movi.uimm & 0x7f;
+            p = _odd_parity_bit(opc);
+
+            /* So, you'd think that checking the parity bit would be
+             * a good way to rule out false positives, but seems like
+             * ME doesn't really care.. at least it would filter out
+             * things that look like actual legit packets between
+             * PFP and ME..
+             */
+            if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) {
+               const char *name = getpm4(opc);
+               printf("\t; ");
+               if (name)
+                  printlbl("%s", name);
+               else
+                  printlbl("UNKN%u", opc);
+               break;
+            }
+         }
+
+         print_gpu_reg(instr->movi.uimm << instr->movi.shift);
+
+         break;
+      }
+      case OPC_ALU: {
+         bool src1 = true;
+
+         if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB)
+            src1 = false;
+
+         if (instr->alu.pad)
+            printf("[%08x]  ; ", instrs[i]);
+
+         if (rep)
+            printf("(rep)");
+         if (instr->alu.xmov)
+            printf("(xmov%d)", instr->alu.xmov);
+
+         /* special case mnemonics:
+          *   reading $00 seems to always yield zero, and so:
+          *      or $dst, $00, $src -> mov $dst, $src
+          *   Maybe add one for negate too, ie.
+          *      sub $dst, $00, $src ???
+          */
+         if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) {
+            printf("mov ");
+            src1 = false;
+         } else {
+            print_alu_name(instr->alu.alu, instrs[i]);
+         }
+
+         print_dst(instr->alu.dst);
+         if (src1) {
+            printf(", ");
+            print_src(instr->alu.src1);
+         }
+         printf(", ");
+         print_src(instr->alu.src2);
+
+         /* print out unexpected bits: */
+         if (verbose) {
+            if (instr->alu.pad)
+               printerr("  (pad=%01x)", instr->alu.pad);
+            if (instr->alu.src1 && !src1)
+               printerr("  (src1=%02x)", instr->alu.src1);
+         }
+
+         /* xmov is a modifier that makes the processor execute up to 3
+          * extra mov's after the current instruction. Given an ALU
+          * instruction:
+          *
+          * (xmovN) alu $dst, $src1, $src2
+          *
+          * In all of the uses in the firmware blob, $dst and $src2 are one
+          * of the "special" registers $data, $addr, $addr2. I've observed
+          * that if $dst isn't "special" then it's replaced with $00
+          * instead of $data, but I haven't checked what happens if $src2
+          * isn't "special".  Anyway, in the usual case, the HW produces a
+          * count M = min(N, $rem) and then does the following:
+          *
+          * M = 1:
+          * mov $data, $src2
+          *
+          * M = 2:
+          * mov $data, $src2
+          * mov $data, $src2
+          *
+          * M = 3:
+          * mov $data, $src2
+          * mov $dst, $src2 (special case for CP_CONTEXT_REG_BUNCH)
+          * mov $data, $src2
+          *
+          * It seems to be frequently used in combination with (rep) to
+          * provide a kind of hardware-based loop unrolling, and there's
+          * even a special case in the ISA to be able to do this with
+          * CP_CONTEXT_REG_BUNCH. However (rep) isn't required.
+          *
+          * This dumps the expected extra instructions, assuming that $rem
+          * isn't too small.
+          */
+         if (verbose && instr->alu.xmov) {
+            for (int i = 0; i < instr->alu.xmov; i++) {
+               printf("\n        ; mov ");
+               if (instr->alu.dst < 0x1d)
+                  printf("$00");
+               else if (instr->alu.xmov == 3 && i == 1)
+                  print_dst(instr->alu.dst);
+               else
+                  printf("$data");
+               printf(", ");
+               print_src(instr->alu.src2);
+            }
+         }
+
+         break;
+      }
+      case OPC_CWRITE6:
+      case OPC_CREAD6:
+      case OPC_STORE6:
+      case OPC_LOAD6: {
+         if (rep)
+            printf("(rep)");
+
+         bool is_control_reg = true;
+         if (gpuver >= 6) {
+            switch (opc) {
+            case OPC_CWRITE6:
+               printf("cwrite ");
+               break;
+            case OPC_CREAD6:
+               printf("cread ");
+               break;
+            case OPC_STORE6:
+               is_control_reg = false;
+               printf("store ");
+               break;
+            case OPC_LOAD6:
+               is_control_reg = false;
+               printf("load ");
+               break;
+            default:
+               assert(!"unreachable");
+            }
+         } else {
+            switch (opc) {
+            case OPC_CWRITE5:
+               printf("cwrite ");
+               break;
+            case OPC_CREAD5:
+               printf("cread ");
+               break;
+            default:
+               fprintf(stderr, "A6xx control opcode on A5xx?\n");
+               exit(1);
+            }
+         }
+
+         print_src(instr->control.src1);
+         printf(", [");
+         print_src(instr->control.src2);
+         printf(" + ");
+         if (is_control_reg && instr->control.flags != 0x4)
+            print_control_reg(instr->control.uimm);
+         else
+            printf("0x%03x", instr->control.uimm);
+         printf("], 0x%x", instr->control.flags);
+         break;
+      }
+      case OPC_BRNEI:
+      case OPC_BREQI:
+      case OPC_BRNEB:
+      case OPC_BREQB: {
+         unsigned off = i + instr->br.ioff;
+
+         assert(!rep);
+
+         /* Since $00 reads back zero, it can be used as src for
+          * unconditional branches.  (This only really makes sense
+          * for the BREQB.. or possible BRNEI if imm==0.)
+          *
+          * If bit=0 then branch is taken if *all* bits are zero.
+          * Otherwise it is taken if bit (bit-1) is clear.
+          *
+          * Note the instruction after a jump/branch is executed
+          * regardless of whether branch is taken, so use nop or
+          * take that into account in code.
+          */
+         if (instr->br.src || (opc != OPC_BRNEB)) {
+            bool immed = false;
+
+            if (opc == OPC_BRNEI) {
+               printf("brne ");
+               immed = true;
+            } else if (opc == OPC_BREQI) {
+               printf("breq ");
+               immed = true;
+            } else if (opc == OPC_BRNEB) {
+               printf("brne ");
+            } else if (opc == OPC_BREQB) {
+               printf("breq ");
+            }
+            print_src(instr->br.src);
+            if (immed) {
+               printf(", 0x%x,", instr->br.bit_or_imm);
+            } else {
+               printf(", b%u,", instr->br.bit_or_imm);
+            }
+         } else {
+            printf("jump");
+            if (verbose && instr->br.bit_or_imm) {
+               printerr("  (src=%03x, bit=%03x) ", instr->br.src,
+                        instr->br.bit_or_imm);
+            }
+         }
+
+         printf(" #");
+         printlbl("%s", label_name(off, true));
+         if (verbose)
+            printf(" (#%d, %04x)", instr->br.ioff, off);
+         break;
+      }
+      case OPC_CALL:
+         assert(!rep);
+         printf("call #");
+         printlbl("%s", fxn_name(instr->call.uoff));
+         if (verbose) {
+            printf(" (%04x)", instr->call.uoff);
+            if (instr->br.bit_or_imm || instr->br.src) {
+               printerr("  (src=%03x, bit=%03x) ", instr->br.src,
+                        instr->br.bit_or_imm);
+            }
+         }
+         break;
+      case OPC_RET:
+         assert(!rep);
+         if (instr->ret.pad)
+            printf("[%08x]  ; ", instrs[i]);
+         if (instr->ret.interrupt)
+            printf("iret");
+         else
+            printf("ret");
+         break;
+      case OPC_WIN:
+         assert(!rep);
+         if (instr->waitin.pad)
+            printf("[%08x]  ; ", instrs[i]);
+         printf("waitin");
+         if (verbose && instr->waitin.pad)
+            printerr("  (pad=%x)", instr->waitin.pad);
+         break;
+      case OPC_PREEMPTLEAVE6:
+         if (gpuver < 6) {
+            printf("[%08x]  ; op38", instrs[i]);
+         } else {
+            printf("preemptleave #");
+            printlbl("%s", label_name(instr->call.uoff, true));
+         }
+         break;
+      case OPC_SETSECURE:
+         /* Note: This seems to implicitly read the secure/not-secure state
+          * to set from the low bit of $02, and implicitly jumps to pc + 3
+          * (i.e. skipping the next two instructions) if it succeeds. We
+          * print these implicit parameters to make reading the disassembly
+          * easier.
+          */
+         if (instr->pad)
+            printf("[%08x]  ; ", instrs[i]);
+         printf("setsecure $02, #");
+         printlbl("%s", label_name(i + 3, true));
+         break;
+      default:
+         printerr("[%08x]", instrs[i]);
+         printf("  ; op%02x ", opc);
+         print_dst(instr->alui.dst);
+         printf(", ");
+         print_src(instr->alui.src);
+         print_gpu_reg(instrs[i] & 0xffff);
+         break;
+      }
+      printf("\n");
+   }
+
+   /* print jumptable: */
+   if (verbose) {
+      printf(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
+      printf("; JUMP TABLE\n");
+      for (i = 0; i < 0x7f; i++) {
+         int n = i; // + CP_NOP;
+         uint32_t offset = jmptbl[i];
+         const char *name = getpm4(n);
+         printf("%3d %02x: ", n, n);
+         printf("%04x", offset);
+         if (name) {
+            printf("   ; %s", name);
+         } else {
+            printf("   ; UNKN%d", n);
+         }
+         printf("\n");
+      }
+   }
 }
 
-static void usage(void)
+static void
+usage(void)
 {
-       fprintf(stderr, "Usage:\n"
-                       "\tdisasm [-g GPUVER] [-v] [-c] filename.asm\n"
-                       "\t\t-g - specify GPU version (5, etc)\n"
-                       "\t\t-c - use colors\n"
-                       "\t\t-v - verbose output\n"
-               );
-       exit(2);
+   fprintf(stderr, "Usage:\n"
+                   "\tdisasm [-g GPUVER] [-v] [-c] filename.asm\n"
+                   "\t\t-g - specify GPU version (5, etc)\n"
+                   "\t\t-c - use colors\n"
+                   "\t\t-v - verbose output\n");
+   exit(2);
 }
 
-int main(int argc, char **argv)
+int
+main(int argc, char **argv)
 {
-       uint32_t *buf;
-       char *file, *control_reg_name;
-       bool colors = false;
-       size_t sz;
-       int c;
-
-       /* Argument parsing: */
-       while ((c = getopt (argc, argv, "g:vc")) != -1) {
-               switch (c) {
-                       case 'g':
-                               gpuver = atoi(optarg);
-                               break;
-                       case 'v':
-                               verbose = true;
-                               break;
-                       case 'c':
-                               colors = true;
-                               break;
-                       default:
-                               usage();
-               }
-       }
-
-       if (optind >= argc) {
-               fprintf(stderr, "no file specified!\n");
-               usage();
-       }
-
-       file = argv[optind];
-
-       /* if gpu version not specified, infer from filename: */
-       if (!gpuver) {
-               if (strstr(file, "a5")) {
-                       gpuver = 5;
-               } else if (strstr(file, "a6")) {
-                       gpuver = 6;
-               }
-       }
-
-       switch (gpuver) {
-       case 6:
-               printf("; a6xx microcode\n");
-               variant = "A6XX";
-               control_reg_name = "A6XX_CONTROL_REG";
-               break;
-       case 5:
-               printf("; a5xx microcode\n");
-               variant = "A5XX";
-               control_reg_name = "A5XX_CONTROL_REG";
-               break;
-       default:
-               fprintf(stderr, "unknown GPU version!\n");
-               usage();
-       }
-
-       rnn_init();
-       db = rnn_newdb();
-
-       ctx = rnndec_newcontext(db);
-       ctx->colors = colors ? &envy_def_colors : &envy_null_colors;
-
-       rnn_parsefile(db, "adreno.xml");
-       rnn_prepdb(db);
-       if (db->estatus)
-               errx(db->estatus, "failed to parse register database");
-       dom[0] = rnn_finddomain(db, variant);
-       dom[1] = rnn_finddomain(db, "AXXX");
-       control_regs = rnn_finddomain(db, control_reg_name);
-
-       rnndec_varadd(ctx, "chip", variant);
-
-       buf = (uint32_t *)os_read_file(file, &sz);
-
-       printf("; Disassembling microcode: %s\n", file);
-       printf("; Version: %08x\n\n", buf[1]);
-       disasm(&buf[1], sz/4 - 1);
-
-       return 0;
+   uint32_t *buf;
+   char *file, *control_reg_name;
+   bool colors = false;
+   size_t sz;
+   int c;
+
+   /* Argument parsing: */
+   while ((c = getopt(argc, argv, "g:vc")) != -1) {
+      switch (c) {
+      case 'g':
+         gpuver = atoi(optarg);
+         break;
+      case 'v':
+         verbose = true;
+         break;
+      case 'c':
+         colors = true;
+         break;
+      default:
+         usage();
+      }
+   }
+
+   if (optind >= argc) {
+      fprintf(stderr, "no file specified!\n");
+      usage();
+   }
+
+   file = argv[optind];
+
+   /* if gpu version not specified, infer from filename: */
+   if (!gpuver) {
+      if (strstr(file, "a5")) {
+         gpuver = 5;
+      } else if (strstr(file, "a6")) {
+         gpuver = 6;
+      }
+   }
+
+   switch (gpuver) {
+   case 6:
+      printf("; a6xx microcode\n");
+      variant = "A6XX";
+      control_reg_name = "A6XX_CONTROL_REG";
+      break;
+   case 5:
+      printf("; a5xx microcode\n");
+      variant = "A5XX";
+      control_reg_name = "A5XX_CONTROL_REG";
+      break;
+   default:
+      fprintf(stderr, "unknown GPU version!\n");
+      usage();
+   }
+
+   rnn_init();
+   db = rnn_newdb();
+
+   ctx = rnndec_newcontext(db);
+   ctx->colors = colors ? &envy_def_colors : &envy_null_colors;
+
+   rnn_parsefile(db, "adreno.xml");
+   rnn_prepdb(db);
+   if (db->estatus)
+      errx(db->estatus, "failed to parse register database");
+   dom[0] = rnn_finddomain(db, variant);
+   dom[1] = rnn_finddomain(db, "AXXX");
+   control_regs = rnn_finddomain(db, control_reg_name);
+
+   rnndec_varadd(ctx, "chip", variant);
+
+   buf = (uint32_t *)os_read_file(file, &sz);
+
+   printf("; Disassembling microcode: %s\n", file);
+   printf("; Version: %08x\n\n", buf[1]);
+   disasm(&buf[1], sz / 4 - 1);
+
+   return 0;
 }