* (ie. top two bits are '11' are encoded as 6 bits. See get_opc()
*/
typedef enum {
- OPC_NOP = 0x00,
-
- OPC_ADD = 0x01, /* add immediate */
- OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */
- OPC_SUB = 0x03, /* subtract immediate */
- OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */
- OPC_AND = 0x05, /* AND immediate */
- OPC_OR = 0x06, /* OR immediate */
- OPC_XOR = 0x07, /* XOR immediate */
- OPC_NOT = 0x08, /* bitwise not of immed (src1 ignored) */
- OPC_SHL = 0x09, /* shift-left immediate */
- OPC_USHR = 0x0a, /* unsigned shift right by immediate */
- OPC_ISHR = 0x0b, /* signed shift right by immediate */
- OPC_ROT = 0x0c, /* rotate left (left shift with wrap-around) */
- OPC_MUL8 = 0x0d, /* 8bit multiply by immediate */
- OPC_MIN = 0x0e,
- OPC_MAX = 0x0f,
- OPC_CMP = 0x10, /* compare src to immed */
- OPC_MOVI = 0x11, /* move immediate */
-
- /* Return the most-significant bit of src2, or 0 if src2 == 0 (the
- * same as if src2 == 1). src1 is ignored. Note that this overlaps
- * with STORE6, so it can only be used with the two-source encoding.
- */
- OPC_MSB = 0x14,
-
-
- OPC_ALU = 0x13, /* ALU instruction with two src registers */
-
- /* These seem something to do with setting some external state..
- * doesn't seem to map *directly* to registers, but I guess that
- * is where things end up. For example, this sequence in the
- * CP_INDIRECT_BUFFER handler:
- *
- * mov $02, $data ; low 32b of IB target address
- * mov $03, $data ; high 32b of IB target
- * mov $04, $data ; IB size in dwords
- * breq $04, 0x0, #l23 (#69, 04a2)
- * and $05, $18, 0x0003
- * shl $05, $05, 0x0002
- * cwrite $02, [$05 + 0x0b0], 0x8
- * cwrite $03, [$05 + 0x0b1], 0x8
- * cwrite $04, [$05 + 0x0b2], 0x8
- *
- * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and
- * 0x0b22->0x0b24 (IB2). Presumably $05 ends up w/ different value
- * for RB->IB1 vs IB1->IB2.
- */
- OPC_CWRITE5 = 0x15,
- OPC_CREAD5 = 0x16,
-
- /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes
- * that let you read/write directly to memory (and bypass the IOMMU?).
- */
- OPC_STORE6 = 0x14,
- OPC_CWRITE6 = 0x15,
- OPC_LOAD6 = 0x16,
- OPC_CREAD6 = 0x17,
-
- OPC_BRNEI = 0x30, /* relative branch (if $src != immed) */
- OPC_BREQI = 0x31, /* relative branch (if $src == immed) */
- OPC_BRNEB = 0x32, /* relative branch (if bit not set) */
- OPC_BREQB = 0x33, /* relative branch (if bit is set) */
- OPC_RET = 0x34, /* return */
- OPC_CALL = 0x35, /* "function" call */
- OPC_WIN = 0x36, /* wait for input (ie. wait for WPTR to advance) */
- OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */
- OPC_SETSECURE = 0x3b, /* switch secure mode on/off */
+ OPC_NOP = 0x00,
+
+ OPC_ADD = 0x01, /* add immediate */
+ OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */
+ OPC_SUB = 0x03, /* subtract immediate */
+ OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */
+ OPC_AND = 0x05, /* AND immediate */
+ OPC_OR = 0x06, /* OR immediate */
+ OPC_XOR = 0x07, /* XOR immediate */
+ OPC_NOT = 0x08, /* bitwise not of immed (src1 ignored) */
+ OPC_SHL = 0x09, /* shift-left immediate */
+ OPC_USHR = 0x0a, /* unsigned shift right by immediate */
+ OPC_ISHR = 0x0b, /* signed shift right by immediate */
+ OPC_ROT = 0x0c, /* rotate left (left shift with wrap-around) */
+ OPC_MUL8 = 0x0d, /* 8bit multiply by immediate */
+ OPC_MIN = 0x0e,
+ OPC_MAX = 0x0f,
+ OPC_CMP = 0x10, /* compare src to immed */
+ OPC_MOVI = 0x11, /* move immediate */
+
+ /* Return the most-significant bit of src2, or 0 if src2 == 0 (the
+ * same as if src2 == 1). src1 is ignored. Note that this overlaps
+ * with STORE6, so it can only be used with the two-source encoding.
+ */
+ OPC_MSB = 0x14,
+
+ OPC_ALU = 0x13, /* ALU instruction with two src registers */
+
+ /* These seem something to do with setting some external state..
+ * doesn't seem to map *directly* to registers, but I guess that
+ * is where things end up. For example, this sequence in the
+ * CP_INDIRECT_BUFFER handler:
+ *
+ * mov $02, $data ; low 32b of IB target address
+ * mov $03, $data ; high 32b of IB target
+ * mov $04, $data ; IB size in dwords
+ * breq $04, 0x0, #l23 (#69, 04a2)
+ * and $05, $18, 0x0003
+ * shl $05, $05, 0x0002
+ * cwrite $02, [$05 + 0x0b0], 0x8
+ * cwrite $03, [$05 + 0x0b1], 0x8
+ * cwrite $04, [$05 + 0x0b2], 0x8
+ *
+ * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and
+ * 0x0b22->0x0b24 (IB2). Presumably $05 ends up w/ different value
+ * for RB->IB1 vs IB1->IB2.
+ */
+ OPC_CWRITE5 = 0x15,
+ OPC_CREAD5 = 0x16,
+
+ /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes
+ * that let you read/write directly to memory (and bypass the IOMMU?).
+ */
+ OPC_STORE6 = 0x14,
+ OPC_CWRITE6 = 0x15,
+ OPC_LOAD6 = 0x16,
+ OPC_CREAD6 = 0x17,
+
+ OPC_BRNEI = 0x30, /* relative branch (if $src != immed) */
+ OPC_BREQI = 0x31, /* relative branch (if $src == immed) */
+ OPC_BRNEB = 0x32, /* relative branch (if bit not set) */
+ OPC_BREQB = 0x33, /* relative branch (if bit is set) */
+ OPC_RET = 0x34, /* return */
+ OPC_CALL = 0x35, /* "function" call */
+ OPC_WIN = 0x36, /* wait for input (ie. wait for WPTR to advance) */
+ OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */
+ OPC_SETSECURE = 0x3b, /* switch secure mode on/off */
} afuc_opc;
-
typedef union PACKED {
- /* addi, subi, andi, ori, xori, etc: */
- struct PACKED {
- uint32_t uimm : 16;
- uint32_t dst : 5;
- uint32_t src : 5;
- uint32_t hdr : 6;
- } alui;
- struct PACKED {
- uint32_t uimm : 16;
- uint32_t dst : 5;
- uint32_t shift : 5;
- uint32_t hdr : 6;
- } movi;
- struct PACKED {
- uint32_t alu : 5;
- uint32_t pad : 4;
- uint32_t xmov : 2; /* execute eXtra mov's based on $rem */
- uint32_t dst : 5;
- uint32_t src2 : 5;
- uint32_t src1 : 5;
- uint32_t hdr : 6;
- } alu;
- struct PACKED {
- uint32_t uimm : 12;
- uint32_t flags : 4;
- uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */
- uint32_t src2 : 5; /* read or write address is src2+uimm */
- uint32_t hdr : 6;
- } control;
- struct PACKED {
- int32_t ioff : 16; /* relative offset */
- uint32_t bit_or_imm : 5;
- uint32_t src : 5;
- uint32_t hdr : 6;
- } br;
- struct PACKED {
- uint32_t uoff : 26; /* absolute (unsigned) offset */
- uint32_t hdr : 6;
- } call;
- struct PACKED {
- uint32_t pad : 25;
- uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */
- uint32_t hdr : 6;
- } ret;
- struct PACKED {
- uint32_t pad : 26;
- uint32_t hdr : 6;
- } waitin;
- struct PACKED {
- uint32_t pad : 26;
- uint32_t opc_r : 6;
- };
+ /* addi, subi, andi, ori, xori, etc: */
+ struct PACKED {
+ uint32_t uimm : 16;
+ uint32_t dst : 5;
+ uint32_t src : 5;
+ uint32_t hdr : 6;
+ } alui;
+ struct PACKED {
+ uint32_t uimm : 16;
+ uint32_t dst : 5;
+ uint32_t shift : 5;
+ uint32_t hdr : 6;
+ } movi;
+ struct PACKED {
+ uint32_t alu : 5;
+ uint32_t pad : 4;
+ uint32_t xmov : 2; /* execute eXtra mov's based on $rem */
+ uint32_t dst : 5;
+ uint32_t src2 : 5;
+ uint32_t src1 : 5;
+ uint32_t hdr : 6;
+ } alu;
+ struct PACKED {
+ uint32_t uimm : 12;
+ uint32_t flags : 4;
+ uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */
+ uint32_t src2 : 5; /* read or write address is src2+uimm */
+ uint32_t hdr : 6;
+ } control;
+ struct PACKED {
+ int32_t ioff : 16; /* relative offset */
+ uint32_t bit_or_imm : 5;
+ uint32_t src : 5;
+ uint32_t hdr : 6;
+ } br;
+ struct PACKED {
+ uint32_t uoff : 26; /* absolute (unsigned) offset */
+ uint32_t hdr : 6;
+ } call;
+ struct PACKED {
+ uint32_t pad : 25;
+ uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */
+ uint32_t hdr : 6;
+ } ret;
+ struct PACKED {
+ uint32_t pad : 26;
+ uint32_t hdr : 6;
+ } waitin;
+ struct PACKED {
+ uint32_t pad : 26;
+ uint32_t opc_r : 6;
+ };
} afuc_instr;
static inline void
afuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep)
{
- if (ai->opc_r < 0x30) {
- *opc = ai->opc_r >> 1;
- *rep = ai->opc_r & 0x1;
- } else {
- *opc = ai->opc_r;
- *rep = false;
- }
+ if (ai->opc_r < 0x30) {
+ *opc = ai->opc_r >> 1;
+ *rep = ai->opc_r & 0x1;
+ } else {
+ *opc = ai->opc_r;
+ *rep = false;
+ }
}
static inline void
afuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep)
{
- if (opc < 0x30) {
- ai->opc_r = opc << 1;
- ai->opc_r |= !!rep;
- } else {
- ai->opc_r = opc;
- }
+ if (opc < 0x30) {
+ ai->opc_r = opc << 1;
+ ai->opc_r |= !!rep;
+ } else {
+ ai->opc_r = opc;
+ }
}
#endif /* _AFUC_H_ */
* SOFTWARE.
*/
+#include <assert.h>
#include <err.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <unistd.h>
#include <fcntl.h>
+#include <getopt.h>
#include <stdarg.h>
-#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-#include <assert.h>
-#include <getopt.h>
+#include <unistd.h>
#include "util/macros.h"
#include "afuc.h"
+#include "asm.h"
+#include "parser.h"
#include "rnn.h"
#include "rnndec.h"
-#include "parser.h"
-#include "asm.h"
int gpuver;
-
static struct rnndeccontext *ctx;
static struct rnndb *db;
static struct rnndomain *control_regs;
struct rnndomain *dom[2];
-
/* bit lame to hard-code max but fw sizes are small */
static struct asm_instruction instructions[0x2000];
static unsigned num_instructions;
static struct asm_label labels[0x512];
static unsigned num_labels;
-struct asm_instruction *next_instr(int tok)
+struct asm_instruction *
+next_instr(int tok)
{
- struct asm_instruction *ai = &instructions[num_instructions++];
- assert(num_instructions < ARRAY_SIZE(instructions));
- ai->tok = tok;
- return ai;
+ struct asm_instruction *ai = &instructions[num_instructions++];
+ assert(num_instructions < ARRAY_SIZE(instructions));
+ ai->tok = tok;
+ return ai;
}
-void decl_label(const char *str)
+void
+decl_label(const char *str)
{
- struct asm_label *label = &labels[num_labels++];
+ struct asm_label *label = &labels[num_labels++];
- assert(num_labels < ARRAY_SIZE(labels));
+ assert(num_labels < ARRAY_SIZE(labels));
- label->offset = num_instructions;
- label->label = str;
+ label->offset = num_instructions;
+ label->label = str;
}
-static int resolve_label(const char *str)
+static int
+resolve_label(const char *str)
{
- int i;
+ int i;
- for (i = 0; i < num_labels; i++) {
- struct asm_label *label = &labels[i];
+ for (i = 0; i < num_labels; i++) {
+ struct asm_label *label = &labels[i];
- if (!strcmp(str, label->label)) {
- return label->offset;
- }
- }
+ if (!strcmp(str, label->label)) {
+ return label->offset;
+ }
+ }
- fprintf(stderr, "Undeclared label: %s\n", str);
- exit(2);
+ fprintf(stderr, "Undeclared label: %s\n", str);
+ exit(2);
}
-static afuc_opc tok2alu(int tok)
+static afuc_opc
+tok2alu(int tok)
{
- switch (tok) {
- case T_OP_ADD: return OPC_ADD;
- case T_OP_ADDHI: return OPC_ADDHI;
- case T_OP_SUB: return OPC_SUB;
- case T_OP_SUBHI: return OPC_SUBHI;
- case T_OP_AND: return OPC_AND;
- case T_OP_OR: return OPC_OR;
- case T_OP_XOR: return OPC_XOR;
- case T_OP_NOT: return OPC_NOT;
- case T_OP_SHL: return OPC_SHL;
- case T_OP_USHR: return OPC_USHR;
- case T_OP_ISHR: return OPC_ISHR;
- case T_OP_ROT: return OPC_ROT;
- case T_OP_MUL8: return OPC_MUL8;
- case T_OP_MIN: return OPC_MIN;
- case T_OP_MAX: return OPC_MAX;
- case T_OP_CMP: return OPC_CMP;
- case T_OP_MSB: return OPC_MSB;
- default:
- assert(0);
- return -1;
- }
+ switch (tok) {
+ case T_OP_ADD:
+ return OPC_ADD;
+ case T_OP_ADDHI:
+ return OPC_ADDHI;
+ case T_OP_SUB:
+ return OPC_SUB;
+ case T_OP_SUBHI:
+ return OPC_SUBHI;
+ case T_OP_AND:
+ return OPC_AND;
+ case T_OP_OR:
+ return OPC_OR;
+ case T_OP_XOR:
+ return OPC_XOR;
+ case T_OP_NOT:
+ return OPC_NOT;
+ case T_OP_SHL:
+ return OPC_SHL;
+ case T_OP_USHR:
+ return OPC_USHR;
+ case T_OP_ISHR:
+ return OPC_ISHR;
+ case T_OP_ROT:
+ return OPC_ROT;
+ case T_OP_MUL8:
+ return OPC_MUL8;
+ case T_OP_MIN:
+ return OPC_MIN;
+ case T_OP_MAX:
+ return OPC_MAX;
+ case T_OP_CMP:
+ return OPC_CMP;
+ case T_OP_MSB:
+ return OPC_MSB;
+ default:
+ assert(0);
+ return -1;
+ }
}
-static void emit_instructions(int outfd)
+static void
+emit_instructions(int outfd)
{
- int i;
-
- /* there is an extra 0x00000000 which kernel strips off.. we could
- * perhaps use it for versioning.
- */
- i = 0;
- write(outfd, &i, 4);
-
- for (i = 0; i < num_instructions; i++) {
- struct asm_instruction *ai = &instructions[i];
- afuc_instr instr = {0};
- afuc_opc opc;
-
- /* special case, 2nd dword is patched up w/ # of instructions
- * (ie. offset of jmptbl)
- */
- if (i == 1) {
- assert(ai->is_literal);
- ai->literal &= ~0xffff;
- ai->literal |= num_instructions;
- }
-
- if (ai->is_literal) {
- write(outfd, &ai->literal, 4);
- continue;
- }
-
- switch (ai->tok) {
- case T_OP_NOP:
- opc = OPC_NOP;
- if (gpuver >= 6)
- instr.pad = 0x1000000;
- break;
- case T_OP_ADD:
- case T_OP_ADDHI:
- case T_OP_SUB:
- case T_OP_SUBHI:
- case T_OP_AND:
- case T_OP_OR:
- case T_OP_XOR:
- case T_OP_NOT:
- case T_OP_SHL:
- case T_OP_USHR:
- case T_OP_ISHR:
- case T_OP_ROT:
- case T_OP_MUL8:
- case T_OP_MIN:
- case T_OP_MAX:
- case T_OP_CMP:
- case T_OP_MSB:
- if (ai->has_immed) {
- /* MSB overlaps with STORE */
- assert(ai->tok != T_OP_MSB);
- if (ai->xmov) {
- fprintf(stderr, "ALU instruction cannot have immediate and xmov\n");
- exit(1);
- }
- opc = tok2alu(ai->tok);
- instr.alui.dst = ai->dst;
- instr.alui.src = ai->src1;
- instr.alui.uimm = ai->immed;
- } else {
- opc = OPC_ALU;
- instr.alu.dst = ai->dst;
- instr.alu.src1 = ai->src1;
- instr.alu.src2 = ai->src2;
- instr.alu.xmov = ai->xmov;
- instr.alu.alu = tok2alu(ai->tok);
- }
- break;
- case T_OP_MOV:
- /* move can either be encoded as movi (ie. move w/ immed) or
- * an alu instruction
- */
- if ((ai->has_immed || ai->label) && ai->xmov) {
- fprintf(stderr, "ALU instruction cannot have immediate and xmov\n");
- exit(1);
- }
- if (ai->has_immed) {
- opc = OPC_MOVI;
- instr.movi.dst = ai->dst;
- instr.movi.uimm = ai->immed;
- instr.movi.shift = ai->shift;
- } else if (ai->label) {
- /* mov w/ a label is just an alias for an immediate, this
- * is useful to load the address of a constant table into
- * a register:
- */
- opc = OPC_MOVI;
- instr.movi.dst = ai->dst;
- instr.movi.uimm = resolve_label(ai->label);
- instr.movi.shift = ai->shift;
- } else {
- /* encode as: or $dst, $00, $src */
- opc = OPC_ALU;
- instr.alu.dst = ai->dst;
- instr.alu.src1 = 0x00; /* $00 reads-back 0 */
- instr.alu.src2 = ai->src1;
- instr.alu.xmov = ai->xmov;
- instr.alu.alu = OPC_OR;
- }
- break;
- case T_OP_CWRITE:
- case T_OP_CREAD:
- case T_OP_STORE:
- case T_OP_LOAD:
- if (gpuver >= 6) {
- if (ai->tok == T_OP_CWRITE) {
- opc = OPC_CWRITE6;
- } else if (ai->tok == T_OP_CREAD) {
- opc = OPC_CREAD6;
- } else if (ai->tok == T_OP_STORE) {
- opc = OPC_STORE6;
- } else if (ai->tok == T_OP_LOAD) {
- opc = OPC_LOAD6;
- }
- } else {
- if (ai->tok == T_OP_CWRITE) {
- opc = OPC_CWRITE5;
- } else if (ai->tok == T_OP_CREAD) {
- opc = OPC_CREAD5;
- } else if (ai->tok == T_OP_STORE ||
- ai->tok == T_OP_LOAD) {
- fprintf(stderr, "load and store do not exist on a5xx\n");
- exit(1);
- }
- }
- instr.control.src1 = ai->src1;
- instr.control.src2 = ai->src2;
- instr.control.flags = ai->bit;
- instr.control.uimm = ai->immed;
- break;
- case T_OP_BRNE:
- case T_OP_BREQ:
- if (ai->has_immed) {
- opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEI : OPC_BREQI;
- instr.br.bit_or_imm = ai->immed;
- } else {
- opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEB : OPC_BREQB;
- instr.br.bit_or_imm = ai->bit;
- }
- instr.br.src = ai->src1;
- instr.br.ioff = resolve_label(ai->label) - i;
- break;
- case T_OP_RET:
- opc = OPC_RET;
- break;
- case T_OP_IRET:
- opc = OPC_RET;
- instr.ret.interrupt = 1;
- break;
- case T_OP_CALL:
- opc = OPC_CALL;
- instr.call.uoff = resolve_label(ai->label);
- break;
- case T_OP_PREEMPTLEAVE:
- opc = OPC_PREEMPTLEAVE6;
- instr.call.uoff = resolve_label(ai->label);
- break;
- case T_OP_SETSECURE:
- opc = OPC_SETSECURE;
- if (resolve_label(ai->label) != i + 3) {
- fprintf(stderr, "jump label %s is incorrect for setsecure\n", ai->label);
- exit(1);
- }
- if (ai->src1 != 0x2) {
- fprintf(stderr, "source for setsecure must be $02\n");
- exit(1);
- }
- break;
- case T_OP_JUMP:
- /* encode jump as: brne $00, b0, #label */
- opc = OPC_BRNEB;
- instr.br.bit_or_imm = 0;
- instr.br.src = 0x00; /* $00 reads-back 0.. compare to 0 */
- instr.br.ioff = resolve_label(ai->label) - i;
- break;
- case T_OP_WAITIN:
- opc = OPC_WIN;
- break;
- default:
- unreachable("");
- }
-
- afuc_set_opc(&instr, opc, ai->rep);
-
- write(outfd, &instr, 4);
- }
-
+ int i;
+
+ /* there is an extra 0x00000000 which kernel strips off.. we could
+ * perhaps use it for versioning.
+ */
+ i = 0;
+ write(outfd, &i, 4);
+
+ for (i = 0; i < num_instructions; i++) {
+ struct asm_instruction *ai = &instructions[i];
+ afuc_instr instr = {0};
+ afuc_opc opc;
+
+ /* special case, 2nd dword is patched up w/ # of instructions
+ * (ie. offset of jmptbl)
+ */
+ if (i == 1) {
+ assert(ai->is_literal);
+ ai->literal &= ~0xffff;
+ ai->literal |= num_instructions;
+ }
+
+ if (ai->is_literal) {
+ write(outfd, &ai->literal, 4);
+ continue;
+ }
+
+ switch (ai->tok) {
+ case T_OP_NOP:
+ opc = OPC_NOP;
+ if (gpuver >= 6)
+ instr.pad = 0x1000000;
+ break;
+ case T_OP_ADD:
+ case T_OP_ADDHI:
+ case T_OP_SUB:
+ case T_OP_SUBHI:
+ case T_OP_AND:
+ case T_OP_OR:
+ case T_OP_XOR:
+ case T_OP_NOT:
+ case T_OP_SHL:
+ case T_OP_USHR:
+ case T_OP_ISHR:
+ case T_OP_ROT:
+ case T_OP_MUL8:
+ case T_OP_MIN:
+ case T_OP_MAX:
+ case T_OP_CMP:
+ case T_OP_MSB:
+ if (ai->has_immed) {
+ /* MSB overlaps with STORE */
+ assert(ai->tok != T_OP_MSB);
+ if (ai->xmov) {
+ fprintf(stderr,
+ "ALU instruction cannot have immediate and xmov\n");
+ exit(1);
+ }
+ opc = tok2alu(ai->tok);
+ instr.alui.dst = ai->dst;
+ instr.alui.src = ai->src1;
+ instr.alui.uimm = ai->immed;
+ } else {
+ opc = OPC_ALU;
+ instr.alu.dst = ai->dst;
+ instr.alu.src1 = ai->src1;
+ instr.alu.src2 = ai->src2;
+ instr.alu.xmov = ai->xmov;
+ instr.alu.alu = tok2alu(ai->tok);
+ }
+ break;
+ case T_OP_MOV:
+ /* move can either be encoded as movi (ie. move w/ immed) or
+ * an alu instruction
+ */
+ if ((ai->has_immed || ai->label) && ai->xmov) {
+ fprintf(stderr, "ALU instruction cannot have immediate and xmov\n");
+ exit(1);
+ }
+ if (ai->has_immed) {
+ opc = OPC_MOVI;
+ instr.movi.dst = ai->dst;
+ instr.movi.uimm = ai->immed;
+ instr.movi.shift = ai->shift;
+ } else if (ai->label) {
+ /* mov w/ a label is just an alias for an immediate, this
+ * is useful to load the address of a constant table into
+ * a register:
+ */
+ opc = OPC_MOVI;
+ instr.movi.dst = ai->dst;
+ instr.movi.uimm = resolve_label(ai->label);
+ instr.movi.shift = ai->shift;
+ } else {
+ /* encode as: or $dst, $00, $src */
+ opc = OPC_ALU;
+ instr.alu.dst = ai->dst;
+ instr.alu.src1 = 0x00; /* $00 reads-back 0 */
+ instr.alu.src2 = ai->src1;
+ instr.alu.xmov = ai->xmov;
+ instr.alu.alu = OPC_OR;
+ }
+ break;
+ case T_OP_CWRITE:
+ case T_OP_CREAD:
+ case T_OP_STORE:
+ case T_OP_LOAD:
+ if (gpuver >= 6) {
+ if (ai->tok == T_OP_CWRITE) {
+ opc = OPC_CWRITE6;
+ } else if (ai->tok == T_OP_CREAD) {
+ opc = OPC_CREAD6;
+ } else if (ai->tok == T_OP_STORE) {
+ opc = OPC_STORE6;
+ } else if (ai->tok == T_OP_LOAD) {
+ opc = OPC_LOAD6;
+ }
+ } else {
+ if (ai->tok == T_OP_CWRITE) {
+ opc = OPC_CWRITE5;
+ } else if (ai->tok == T_OP_CREAD) {
+ opc = OPC_CREAD5;
+ } else if (ai->tok == T_OP_STORE || ai->tok == T_OP_LOAD) {
+ fprintf(stderr, "load and store do not exist on a5xx\n");
+ exit(1);
+ }
+ }
+ instr.control.src1 = ai->src1;
+ instr.control.src2 = ai->src2;
+ instr.control.flags = ai->bit;
+ instr.control.uimm = ai->immed;
+ break;
+ case T_OP_BRNE:
+ case T_OP_BREQ:
+ if (ai->has_immed) {
+ opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEI : OPC_BREQI;
+ instr.br.bit_or_imm = ai->immed;
+ } else {
+ opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEB : OPC_BREQB;
+ instr.br.bit_or_imm = ai->bit;
+ }
+ instr.br.src = ai->src1;
+ instr.br.ioff = resolve_label(ai->label) - i;
+ break;
+ case T_OP_RET:
+ opc = OPC_RET;
+ break;
+ case T_OP_IRET:
+ opc = OPC_RET;
+ instr.ret.interrupt = 1;
+ break;
+ case T_OP_CALL:
+ opc = OPC_CALL;
+ instr.call.uoff = resolve_label(ai->label);
+ break;
+ case T_OP_PREEMPTLEAVE:
+ opc = OPC_PREEMPTLEAVE6;
+ instr.call.uoff = resolve_label(ai->label);
+ break;
+ case T_OP_SETSECURE:
+ opc = OPC_SETSECURE;
+ if (resolve_label(ai->label) != i + 3) {
+ fprintf(stderr, "jump label %s is incorrect for setsecure\n",
+ ai->label);
+ exit(1);
+ }
+ if (ai->src1 != 0x2) {
+ fprintf(stderr, "source for setsecure must be $02\n");
+ exit(1);
+ }
+ break;
+ case T_OP_JUMP:
+ /* encode jump as: brne $00, b0, #label */
+ opc = OPC_BRNEB;
+ instr.br.bit_or_imm = 0;
+ instr.br.src = 0x00; /* $00 reads-back 0.. compare to 0 */
+ instr.br.ioff = resolve_label(ai->label) - i;
+ break;
+ case T_OP_WAITIN:
+ opc = OPC_WIN;
+ break;
+ default:
+ unreachable("");
+ }
+
+ afuc_set_opc(&instr, opc, ai->rep);
+
+ write(outfd, &instr, 4);
+ }
}
-static int find_enum_val(struct rnnenum *en, const char *name)
+static int
+find_enum_val(struct rnnenum *en, const char *name)
{
- int i;
+ int i;
- for (i = 0; i < en->valsnum; i++)
- if (en->vals[i]->valvalid && !strcmp(name, en->vals[i]->name))
- return en->vals[i]->value;
+ for (i = 0; i < en->valsnum; i++)
+ if (en->vals[i]->valvalid && !strcmp(name, en->vals[i]->name))
+ return en->vals[i]->value;
- return -1;
+ return -1;
}
-static int find_reg(struct rnndomain *dom, const char *name)
+static int
+find_reg(struct rnndomain *dom, const char *name)
{
- int i;
+ int i;
- for (i = 0; i < dom->subelemsnum; i++)
- if (!strcmp(name, dom->subelems[i]->name))
- return dom->subelems[i]->offset;
+ for (i = 0; i < dom->subelemsnum; i++)
+ if (!strcmp(name, dom->subelems[i]->name))
+ return dom->subelems[i]->offset;
- return -1;
+ return -1;
}
-unsigned parse_control_reg(const char *name)
+unsigned
+parse_control_reg(const char *name)
{
- /* skip leading "@" */
- int val = find_reg(control_regs, name + 1);
- if (val < 0) {
- printf("invalid control reg: %s\n", name);
- exit(2);
- }
- return (unsigned)val;
+ /* skip leading "@" */
+ int val = find_reg(control_regs, name + 1);
+ if (val < 0) {
+ printf("invalid control reg: %s\n", name);
+ exit(2);
+ }
+ return (unsigned)val;
}
-static void emit_jumptable(int outfd)
+static void
+emit_jumptable(int outfd)
{
- struct rnnenum *en = rnn_findenum(ctx->db, "adreno_pm4_type3_packets");
- uint32_t jmptable[0x80] = {0};
- int i;
-
- for (i = 0; i < num_labels; i++) {
- struct asm_label *label = &labels[i];
- int id = find_enum_val(en, label->label);
-
- /* if it doesn't match a known PM4 packet-id, try to match UNKN%d: */
- if (id < 0) {
- if (sscanf(label->label, "UNKN%d", &id) != 1) {
- /* if still not found, must not belong in jump-table: */
- continue;
- }
- }
-
- jmptable[id] = label->offset;
- }
-
- write(outfd, jmptable, sizeof(jmptable));
+ struct rnnenum *en = rnn_findenum(ctx->db, "adreno_pm4_type3_packets");
+ uint32_t jmptable[0x80] = {0};
+ int i;
+
+ for (i = 0; i < num_labels; i++) {
+ struct asm_label *label = &labels[i];
+ int id = find_enum_val(en, label->label);
+
+ /* if it doesn't match a known PM4 packet-id, try to match UNKN%d: */
+ if (id < 0) {
+ if (sscanf(label->label, "UNKN%d", &id) != 1) {
+ /* if still not found, must not belong in jump-table: */
+ continue;
+ }
+ }
+
+ jmptable[id] = label->offset;
+ }
+
+ write(outfd, jmptable, sizeof(jmptable));
}
-static void usage(void)
+static void
+usage(void)
{
- fprintf(stderr, "Usage:\n"
- "\tasm [-g GPUVER] filename.asm filename.fw\n"
- "\t\t-g - specify GPU version (5, etc)\n"
- );
- exit(2);
+ fprintf(stderr, "Usage:\n"
+ "\tasm [-g GPUVER] filename.asm filename.fw\n"
+ "\t\t-g - specify GPU version (5, etc)\n");
+ exit(2);
}
-int main(int argc, char **argv)
+int
+main(int argc, char **argv)
{
- FILE *in;
- char *file, *outfile, *name, *control_reg_name;
- int c, ret, outfd;
-
- /* Argument parsing: */
- while ((c = getopt (argc, argv, "g:")) != -1) {
- switch (c) {
- case 'g':
- gpuver = atoi(optarg);
- break;
- default:
- usage();
- }
- }
-
- if (optind >= (argc + 1)) {
- fprintf(stderr, "no file specified!\n");
- usage();
- }
-
- file = argv[optind];
- outfile = argv[optind + 1];
-
- outfd = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
- if (outfd < 0) {
- fprintf(stderr, "could not open \"%s\"\n", outfile);
- usage();
- }
-
- in = fopen(file, "r");
- if (!in) {
- fprintf(stderr, "could not open \"%s\"\n", file);
- usage();
- }
-
- yyset_in(in);
-
- /* if gpu version not specified, infer from filename: */
- if (!gpuver) {
- if (strstr(file, "a5")) {
- gpuver = 5;
- } else if (strstr(file, "a6")) {
- gpuver = 6;
- }
- }
-
- switch (gpuver) {
- case 6:
- name = "A6XX";
- control_reg_name = "A6XX_CONTROL_REG";
- break;
- case 5:
- name = "A5XX";
- control_reg_name = "A5XX_CONTROL_REG";
- break;
- default:
- fprintf(stderr, "unknown GPU version!\n");
- usage();
- }
-
- rnn_init();
- db = rnn_newdb();
-
- ctx = rnndec_newcontext(db);
-
- rnn_parsefile(db, "adreno.xml");
- rnn_prepdb(db);
- if (db->estatus)
- errx(db->estatus, "failed to parse register database");
- dom[0] = rnn_finddomain(db, name);
- dom[1] = rnn_finddomain(db, "AXXX");
- control_regs = rnn_finddomain(db, control_reg_name);
-
- ret = yyparse();
- if (ret) {
- fprintf(stderr, "parse failed: %d\n", ret);
- return ret;
- }
-
- emit_instructions(outfd);
- emit_jumptable(outfd);
-
- close(outfd);
-
- return 0;
+ FILE *in;
+ char *file, *outfile, *name, *control_reg_name;
+ int c, ret, outfd;
+
+ /* Argument parsing: */
+ while ((c = getopt(argc, argv, "g:")) != -1) {
+ switch (c) {
+ case 'g':
+ gpuver = atoi(optarg);
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (optind >= (argc + 1)) {
+ fprintf(stderr, "no file specified!\n");
+ usage();
+ }
+
+ file = argv[optind];
+ outfile = argv[optind + 1];
+
+ outfd = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (outfd < 0) {
+ fprintf(stderr, "could not open \"%s\"\n", outfile);
+ usage();
+ }
+
+ in = fopen(file, "r");
+ if (!in) {
+ fprintf(stderr, "could not open \"%s\"\n", file);
+ usage();
+ }
+
+ yyset_in(in);
+
+ /* if gpu version not specified, infer from filename: */
+ if (!gpuver) {
+ if (strstr(file, "a5")) {
+ gpuver = 5;
+ } else if (strstr(file, "a6")) {
+ gpuver = 6;
+ }
+ }
+
+ switch (gpuver) {
+ case 6:
+ name = "A6XX";
+ control_reg_name = "A6XX_CONTROL_REG";
+ break;
+ case 5:
+ name = "A5XX";
+ control_reg_name = "A5XX_CONTROL_REG";
+ break;
+ default:
+ fprintf(stderr, "unknown GPU version!\n");
+ usage();
+ }
+
+ rnn_init();
+ db = rnn_newdb();
+
+ ctx = rnndec_newcontext(db);
+
+ rnn_parsefile(db, "adreno.xml");
+ rnn_prepdb(db);
+ if (db->estatus)
+ errx(db->estatus, "failed to parse register database");
+ dom[0] = rnn_finddomain(db, name);
+ dom[1] = rnn_finddomain(db, "AXXX");
+ control_regs = rnn_finddomain(db, control_reg_name);
+
+ ret = yyparse();
+ if (ret) {
+ fprintf(stderr, "parse failed: %d\n", ret);
+ return ret;
+ }
+
+ emit_instructions(outfd);
+ emit_jumptable(outfd);
+
+ close(outfd);
+
+ return 0;
}
* SOFTWARE.
*/
+#include <assert.h>
#include <err.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <unistd.h>
#include <fcntl.h>
+#include <getopt.h>
#include <stdarg.h>
-#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-#include <assert.h>
-#include <getopt.h>
+#include <unistd.h>
#include "util/os_file.h"
static int gpuver;
-
static struct rnndeccontext *ctx;
static struct rnndb *db;
static struct rnndomain *control_regs;
*/
static bool verbose = false;
-static void print_gpu_reg(uint32_t regbase)
+static void
+print_gpu_reg(uint32_t regbase)
{
- struct rnndomain *d = NULL;
-
- if (regbase < 0x100)
- return;
-
- if (rnndec_checkaddr(ctx, dom[0], regbase, 0))
- d = dom[0];
- else if (rnndec_checkaddr(ctx, dom[1], regbase, 0))
- d = dom[1];
-
- if (d) {
- struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, d, regbase, 0);
- if (info) {
- printf("\t; %s", info->name);
- free(info->name);
- free(info);
- return;
- }
- }
+ struct rnndomain *d = NULL;
+
+ if (regbase < 0x100)
+ return;
+
+ if (rnndec_checkaddr(ctx, dom[0], regbase, 0))
+ d = dom[0];
+ else if (rnndec_checkaddr(ctx, dom[1], regbase, 0))
+ d = dom[1];
+
+ if (d) {
+ struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, d, regbase, 0);
+ if (info) {
+ printf("\t; %s", info->name);
+ free(info->name);
+ free(info);
+ return;
+ }
+ }
}
-static void printc(const char *c, const char *fmt, ...)
+static void
+printc(const char *c, const char *fmt, ...)
{
- va_list args;
- printf("%s", c);
- va_start(args, fmt);
- vprintf(fmt, args);
- va_end(args);
- printf("%s", ctx->colors->reset);
+ va_list args;
+ printf("%s", c);
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+ printf("%s", ctx->colors->reset);
}
#define printerr(fmt, ...) printc(ctx->colors->err, fmt, ##__VA_ARGS__)
#define printlbl(fmt, ...) printc(ctx->colors->btarg, fmt, ##__VA_ARGS__)
-static void print_reg(unsigned reg)
+static void
+print_reg(unsigned reg)
{
-// XXX seems like *reading* $00 --> literal zero??
-// seems like read from $1c gives packet remaining len??
-// $01 current packet header, writing to $01 triggers
-// parsing header and jumping to appropriate handler.
- if (reg == 0x1c)
- printf("$rem"); /* remainding dwords in packet */
- else if (reg == 0x1d)
- printf("$addr");
- else if (reg == 0x1e)
- printf("$addr2"); // XXX
- else if (reg == 0x1f)
- printf("$data");
- else
- printf("$%02x", reg);
+ // XXX seems like *reading* $00 --> literal zero??
+ // seems like read from $1c gives packet remaining len??
+ // $01 current packet header, writing to $01 triggers
+ // parsing header and jumping to appropriate handler.
+ if (reg == 0x1c)
+ printf("$rem"); /* remainding dwords in packet */
+ else if (reg == 0x1d)
+ printf("$addr");
+ else if (reg == 0x1e)
+ printf("$addr2"); // XXX
+ else if (reg == 0x1f)
+ printf("$data");
+ else
+ printf("$%02x", reg);
}
-static void print_src(unsigned reg)
+static void
+print_src(unsigned reg)
{
- print_reg(reg);
+ print_reg(reg);
}
-static void print_dst(unsigned reg)
+static void
+print_dst(unsigned reg)
{
- print_reg(reg);
+ print_reg(reg);
}
-static void print_alu_name(afuc_opc opc, uint32_t instr)
+static void
+print_alu_name(afuc_opc opc, uint32_t instr)
{
- if (opc == OPC_ADD) {
- printf("add ");
- } else if (opc == OPC_ADDHI) {
- printf("addhi ");
- } else if (opc == OPC_SUB) {
- printf("sub ");
- } else if (opc == OPC_SUBHI) {
- printf("subhi ");
- } else if (opc == OPC_AND) {
- printf("and ");
- } else if (opc == OPC_OR) {
- printf("or ");
- } else if (opc == OPC_XOR) {
- printf("xor ");
- } else if (opc == OPC_NOT) {
- printf("not ");
- } else if (opc == OPC_SHL) {
- printf("shl ");
- } else if (opc == OPC_USHR) {
- printf("ushr ");
- } else if (opc == OPC_ISHR) {
- printf("ishr ");
- } else if (opc == OPC_ROT) {
- printf("rot ");
- } else if (opc == OPC_MUL8) {
- printf("mul8 ");
- } else if (opc == OPC_MIN) {
- printf("min ");
- } else if (opc == OPC_MAX) {
- printf("max ");
- } else if (opc == OPC_CMP) {
- printf("cmp ");
- } else if (opc == OPC_MSB) {
- printf("msb ");
- } else {
- printerr("[%08x]", instr);
- printf(" ; alu%02x ", opc);
- }
+ if (opc == OPC_ADD) {
+ printf("add ");
+ } else if (opc == OPC_ADDHI) {
+ printf("addhi ");
+ } else if (opc == OPC_SUB) {
+ printf("sub ");
+ } else if (opc == OPC_SUBHI) {
+ printf("subhi ");
+ } else if (opc == OPC_AND) {
+ printf("and ");
+ } else if (opc == OPC_OR) {
+ printf("or ");
+ } else if (opc == OPC_XOR) {
+ printf("xor ");
+ } else if (opc == OPC_NOT) {
+ printf("not ");
+ } else if (opc == OPC_SHL) {
+ printf("shl ");
+ } else if (opc == OPC_USHR) {
+ printf("ushr ");
+ } else if (opc == OPC_ISHR) {
+ printf("ishr ");
+ } else if (opc == OPC_ROT) {
+ printf("rot ");
+ } else if (opc == OPC_MUL8) {
+ printf("mul8 ");
+ } else if (opc == OPC_MIN) {
+ printf("min ");
+ } else if (opc == OPC_MAX) {
+ printf("max ");
+ } else if (opc == OPC_CMP) {
+ printf("cmp ");
+ } else if (opc == OPC_MSB) {
+ printf("msb ");
+ } else {
+ printerr("[%08x]", instr);
+ printf(" ; alu%02x ", opc);
+ }
}
-static const char *getpm4(uint32_t id)
+static const char *
+getpm4(uint32_t id)
{
- return rnndec_decode_enum(ctx, "adreno_pm4_type3_packets", id);
+ return rnndec_decode_enum(ctx, "adreno_pm4_type3_packets", id);
}
static inline unsigned
_odd_parity_bit(unsigned val)
{
- /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
- * note that we want odd parity so 0x6996 is inverted.
- */
- val ^= val >> 16;
- val ^= val >> 8;
- val ^= val >> 4;
- val &= 0xf;
- return (~0x6996 >> val) & 1;
+ /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
+ * note that we want odd parity so 0x6996 is inverted.
+ */
+ val ^= val >> 16;
+ val ^= val >> 8;
+ val ^= val >> 4;
+ val &= 0xf;
+ return (~0x6996 >> val) & 1;
}
static struct {
- uint32_t offset;
- uint32_t num_jump_labels;
- uint32_t jump_labels[256];
+ uint32_t offset;
+ uint32_t num_jump_labels;
+ uint32_t jump_labels[256];
} jump_labels[1024];
int num_jump_labels;
-static void add_jump_table_entry(uint32_t n, uint32_t offset)
+static void
+add_jump_table_entry(uint32_t n, uint32_t offset)
{
- int i;
+ int i;
- if (n > 128) /* can't possibly be a PM4 PKT3.. */
- return;
+ if (n > 128) /* can't possibly be a PM4 PKT3.. */
+ return;
- for (i = 0; i < num_jump_labels; i++)
- if (jump_labels[i].offset == offset)
- goto add_label;
+ for (i = 0; i < num_jump_labels; i++)
+ if (jump_labels[i].offset == offset)
+ goto add_label;
- num_jump_labels = i + 1;
- jump_labels[i].offset = offset;
- jump_labels[i].num_jump_labels = 0;
+ num_jump_labels = i + 1;
+ jump_labels[i].offset = offset;
+ jump_labels[i].num_jump_labels = 0;
add_label:
- jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n;
- assert(jump_labels[i].num_jump_labels < 256);
+ jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n;
+ assert(jump_labels[i].num_jump_labels < 256);
}
-static int get_jump_table_entry(uint32_t offset)
+static int
+get_jump_table_entry(uint32_t offset)
{
- int i;
+ int i;
- for (i = 0; i < num_jump_labels; i++)
- if (jump_labels[i].offset == offset)
- return i;
+ for (i = 0; i < num_jump_labels; i++)
+ if (jump_labels[i].offset == offset)
+ return i;
- return -1;
+ return -1;
}
static uint32_t label_offsets[0x512];
static int num_label_offsets;
-static int label_idx(uint32_t offset, bool create)
+static int
+label_idx(uint32_t offset, bool create)
{
- int i;
- for (i = 0; i < num_label_offsets; i++)
- if (offset == label_offsets[i])
- return i;
- if (!create)
- return -1;
- label_offsets[i] = offset;
- num_label_offsets = i+1;
- return i;
+ int i;
+ for (i = 0; i < num_label_offsets; i++)
+ if (offset == label_offsets[i])
+ return i;
+ if (!create)
+ return -1;
+ label_offsets[i] = offset;
+ num_label_offsets = i + 1;
+ return i;
}
static const char *
label_name(uint32_t offset, bool allow_jt)
{
- static char name[12];
- int lidx;
-
- if (allow_jt) {
- lidx = get_jump_table_entry(offset);
- if (lidx >= 0) {
- int j;
- for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) {
- uint32_t jump_label = jump_labels[lidx].jump_labels[j];
- const char *str = getpm4(jump_label);
- if (str)
- return str;
- }
- // if we don't find anything w/ known name, maybe we should
- // return UNKN%d to at least make it clear that this is some
- // sort of jump-table entry?
- }
- }
-
- lidx = label_idx(offset, false);
- if (lidx < 0)
- return NULL;
- sprintf(name, "l%03d", lidx);
- return name;
+ static char name[12];
+ int lidx;
+
+ if (allow_jt) {
+ lidx = get_jump_table_entry(offset);
+ if (lidx >= 0) {
+ int j;
+ for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) {
+ uint32_t jump_label = jump_labels[lidx].jump_labels[j];
+ const char *str = getpm4(jump_label);
+ if (str)
+ return str;
+ }
+ // if we don't find anything w/ known name, maybe we should
+ // return UNKN%d to at least make it clear that this is some
+ // sort of jump-table entry?
+ }
+ }
+
+ lidx = label_idx(offset, false);
+ if (lidx < 0)
+ return NULL;
+ sprintf(name, "l%03d", lidx);
+ return name;
}
-
static uint32_t fxn_offsets[0x512];
static int num_fxn_offsets;
-static int fxn_idx(uint32_t offset, bool create)
+static int
+fxn_idx(uint32_t offset, bool create)
{
- int i;
- for (i = 0; i < num_fxn_offsets; i++)
- if (offset == fxn_offsets[i])
- return i;
- if (!create)
- return -1;
- fxn_offsets[i] = offset;
- num_fxn_offsets = i+1;
- return i;
+ int i;
+ for (i = 0; i < num_fxn_offsets; i++)
+ if (offset == fxn_offsets[i])
+ return i;
+ if (!create)
+ return -1;
+ fxn_offsets[i] = offset;
+ num_fxn_offsets = i + 1;
+ return i;
}
static const char *
fxn_name(uint32_t offset)
{
- static char name[14];
- int fidx = fxn_idx(offset, false);
- if (fidx < 0)
- return NULL;
- sprintf(name, "fxn%02d", fidx);
- return name;
+ static char name[14];
+ int fidx = fxn_idx(offset, false);
+ if (fidx < 0)
+ return NULL;
+ sprintf(name, "fxn%02d", fidx);
+ return name;
}
-static void print_control_reg(uint32_t id)
+static void
+print_control_reg(uint32_t id)
{
- if (rnndec_checkaddr(ctx, control_regs, id, 0)) {
- struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, control_regs, id, 0);
- printf("@%s", info->name);
- free(info->name);
- free(info);
- } else {
- printf("0x%03x", id);
- }
+ if (rnndec_checkaddr(ctx, control_regs, id, 0)) {
+ struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, control_regs, id, 0);
+ printf("@%s", info->name);
+ free(info->name);
+ free(info);
+ } else {
+ printf("0x%03x", id);
+ }
}
-static void disasm(uint32_t *buf, int sizedwords)
+static void
+disasm(uint32_t *buf, int sizedwords)
{
- uint32_t *instrs = buf;
- const int jmptbl_start = instrs[1] & 0xffff;
- uint32_t *jmptbl = &buf[jmptbl_start];
- afuc_opc opc;
- bool rep;
- int i;
-
-
- /* parse jumptable: */
- for (i = 0; i < 0x80; i++) {
- unsigned offset = jmptbl[i];
- unsigned n = i;// + CP_NOP;
- add_jump_table_entry(n, offset);
- }
-
- /* do a pre-pass to find instructions that are potential branch targets,
- * and add labels for them:
- */
- for (i = 0; i < jmptbl_start; i++) {
- afuc_instr *instr = (void *)&instrs[i];
-
- afuc_get_opc(instr, &opc, &rep);
-
- switch (opc) {
- case OPC_BRNEI:
- case OPC_BREQI:
- case OPC_BRNEB:
- case OPC_BREQB:
- label_idx(i + instr->br.ioff, true);
- break;
- case OPC_PREEMPTLEAVE6:
- if (gpuver >= 6)
- label_idx(instr->call.uoff, true);
- break;
- case OPC_CALL:
- fxn_idx(instr->call.uoff, true);
- break;
- case OPC_SETSECURE:
- /* this implicitly jumps to pc + 3 if successful */
- label_idx(i + 3, true);
- break;
- default:
- break;
- }
- }
-
- /* print instructions: */
- for (i = 0; i < jmptbl_start; i++) {
- int jump_label_idx;
- afuc_instr *instr = (void *)&instrs[i];
- const char *fname, *lname;
- afuc_opc opc;
- bool rep;
-
- afuc_get_opc(instr, &opc, &rep);
-
- lname = label_name(i, false);
- fname = fxn_name(i);
- jump_label_idx = get_jump_table_entry(i);
-
- if (jump_label_idx >= 0) {
- int j;
- printf("\n");
- for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) {
- uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j];
- const char *name = getpm4(jump_label);
- if (name) {
- printlbl("%s", name);
- } else {
- printlbl("UNKN%d", jump_label);
- }
- printf(":\n");
- }
- }
-
- if (fname) {
- printlbl("%s", fname);
- printf(":\n");
- }
-
- if (lname) {
- printlbl(" %s", lname);
- printf(":");
- } else {
- printf(" ");
- }
-
-
- if (verbose) {
- printf("\t%04x: %08x ", i, instrs[i]);
- } else {
- printf(" ");
- }
-
- switch (opc) {
- case OPC_NOP: {
- /* a6xx changed the default immediate, and apparently 0
- * is illegal now.
- */
- const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0;
- if (instrs[i] != nop) {
- printerr("[%08x]", instrs[i]);
- printf(" ; ");
- }
- if (rep)
- printf("(rep)");
- printf("nop");
- print_gpu_reg(instrs[i]);
-
- break;
- }
- case OPC_ADD:
- case OPC_ADDHI:
- case OPC_SUB:
- case OPC_SUBHI:
- case OPC_AND:
- case OPC_OR:
- case OPC_XOR:
- case OPC_NOT:
- case OPC_SHL:
- case OPC_USHR:
- case OPC_ISHR:
- case OPC_ROT:
- case OPC_MUL8:
- case OPC_MIN:
- case OPC_MAX:
- case OPC_CMP: {
- bool src1 = true;
-
- if (opc == OPC_NOT)
- src1 = false;
-
- if (rep)
- printf("(rep)");
-
- print_alu_name(opc, instrs[i]);
- print_dst(instr->alui.dst);
- printf(", ");
- if (src1) {
- print_src(instr->alui.src);
- printf(", ");
- }
- printf("0x%04x", instr->alui.uimm);
- print_gpu_reg(instr->alui.uimm);
-
- /* print out unexpected bits: */
- if (verbose) {
- if (instr->alui.src && !src1)
- printerr(" (src=%02x)", instr->alui.src);
- }
-
- break;
- }
- case OPC_MOVI: {
- if (rep)
- printf("(rep)");
- printf("mov ");
- print_dst(instr->movi.dst);
- printf(", 0x%04x", instr->movi.uimm);
- if (instr->movi.shift)
- printf(" << %u", instr->movi.shift);
-
- /* using mov w/ << 16 is popular way to construct a pkt7
- * header to send (for ex, from PFP to ME), so check that
- * case first
- */
- if ((instr->movi.shift == 16) &&
- ((instr->movi.uimm & 0xff00) == 0x7000)) {
- unsigned opc, p;
-
- opc = instr->movi.uimm & 0x7f;
- p = _odd_parity_bit(opc);
-
- /* So, you'd think that checking the parity bit would be
- * a good way to rule out false positives, but seems like
- * ME doesn't really care.. at least it would filter out
- * things that look like actual legit packets between
- * PFP and ME..
- */
- if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) {
- const char *name = getpm4(opc);
- printf("\t; ");
- if (name)
- printlbl("%s", name);
- else
- printlbl("UNKN%u", opc);
- break;
- }
- }
-
- print_gpu_reg(instr->movi.uimm << instr->movi.shift);
-
- break;
- }
- case OPC_ALU: {
- bool src1 = true;
-
- if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB)
- src1 = false;
-
- if (instr->alu.pad)
- printf("[%08x] ; ", instrs[i]);
-
- if (rep)
- printf("(rep)");
- if (instr->alu.xmov)
- printf("(xmov%d)", instr->alu.xmov);
-
- /* special case mnemonics:
- * reading $00 seems to always yield zero, and so:
- * or $dst, $00, $src -> mov $dst, $src
- * Maybe add one for negate too, ie.
- * sub $dst, $00, $src ???
- */
- if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) {
- printf("mov ");
- src1 = false;
- } else {
- print_alu_name(instr->alu.alu, instrs[i]);
- }
-
- print_dst(instr->alu.dst);
- if (src1) {
- printf(", ");
- print_src(instr->alu.src1);
- }
- printf(", ");
- print_src(instr->alu.src2);
-
- /* print out unexpected bits: */
- if (verbose) {
- if (instr->alu.pad)
- printerr(" (pad=%01x)", instr->alu.pad);
- if (instr->alu.src1 && !src1)
- printerr(" (src1=%02x)", instr->alu.src1);
- }
-
- /* xmov is a modifier that makes the processor execute up to 3
- * extra mov's after the current instruction. Given an ALU
- * instruction:
- *
- * (xmovN) alu $dst, $src1, $src2
- *
- * In all of the uses in the firmware blob, $dst and $src2 are one
- * of the "special" registers $data, $addr, $addr2. I've observed
- * that if $dst isn't "special" then it's replaced with $00
- * instead of $data, but I haven't checked what happens if $src2
- * isn't "special". Anyway, in the usual case, the HW produces a
- * count M = min(N, $rem) and then does the following:
- *
- * M = 1:
- * mov $data, $src2
- *
- * M = 2:
- * mov $data, $src2
- * mov $data, $src2
- *
- * M = 3:
- * mov $data, $src2
- * mov $dst, $src2 (special case for CP_CONTEXT_REG_BUNCH)
- * mov $data, $src2
- *
- * It seems to be frequently used in combination with (rep) to
- * provide a kind of hardware-based loop unrolling, and there's
- * even a special case in the ISA to be able to do this with
- * CP_CONTEXT_REG_BUNCH. However (rep) isn't required.
- *
- * This dumps the expected extra instructions, assuming that $rem
- * isn't too small.
- */
- if (verbose && instr->alu.xmov) {
- for (int i = 0; i < instr->alu.xmov; i++) {
- printf("\n ; mov ");
- if (instr->alu.dst < 0x1d)
- printf("$00");
- else if (instr->alu.xmov == 3 && i == 1)
- print_dst(instr->alu.dst);
- else
- printf("$data");
- printf(", ");
- print_src(instr->alu.src2);
- }
- }
-
- break;
- }
- case OPC_CWRITE6:
- case OPC_CREAD6:
- case OPC_STORE6:
- case OPC_LOAD6: {
- if (rep)
- printf("(rep)");
-
- bool is_control_reg = true;
- if (gpuver >= 6) {
- switch (opc) {
- case OPC_CWRITE6:
- printf("cwrite ");
- break;
- case OPC_CREAD6:
- printf("cread ");
- break;
- case OPC_STORE6:
- is_control_reg = false;
- printf("store ");
- break;
- case OPC_LOAD6:
- is_control_reg = false;
- printf("load ");
- break;
- default:
- assert(!"unreachable");
- }
- } else {
- switch (opc) {
- case OPC_CWRITE5:
- printf("cwrite ");
- break;
- case OPC_CREAD5:
- printf("cread ");
- break;
- default:
- fprintf(stderr, "A6xx control opcode on A5xx?\n");
- exit(1);
- }
- }
-
- print_src(instr->control.src1);
- printf(", [");
- print_src(instr->control.src2);
- printf(" + ");
- if (is_control_reg && instr->control.flags != 0x4)
- print_control_reg(instr->control.uimm);
- else
- printf("0x%03x", instr->control.uimm);
- printf("], 0x%x", instr->control.flags);
- break;
- }
- case OPC_BRNEI:
- case OPC_BREQI:
- case OPC_BRNEB:
- case OPC_BREQB: {
- unsigned off = i + instr->br.ioff;
-
- assert(!rep);
-
- /* Since $00 reads back zero, it can be used as src for
- * unconditional branches. (This only really makes sense
- * for the BREQB.. or possible BRNEI if imm==0.)
- *
- * If bit=0 then branch is taken if *all* bits are zero.
- * Otherwise it is taken if bit (bit-1) is clear.
- *
- * Note the instruction after a jump/branch is executed
- * regardless of whether branch is taken, so use nop or
- * take that into account in code.
- */
- if (instr->br.src || (opc != OPC_BRNEB)) {
- bool immed = false;
-
- if (opc == OPC_BRNEI) {
- printf("brne ");
- immed = true;
- } else if (opc == OPC_BREQI) {
- printf("breq ");
- immed = true;
- } else if (opc == OPC_BRNEB) {
- printf("brne ");
- } else if (opc == OPC_BREQB) {
- printf("breq ");
- }
- print_src(instr->br.src);
- if (immed) {
- printf(", 0x%x,", instr->br.bit_or_imm);
- } else {
- printf(", b%u,", instr->br.bit_or_imm);
- }
- } else {
- printf("jump");
- if (verbose && instr->br.bit_or_imm) {
- printerr(" (src=%03x, bit=%03x) ",
- instr->br.src, instr->br.bit_or_imm);
- }
- }
-
- printf(" #");
- printlbl("%s", label_name(off, true));
- if (verbose)
- printf(" (#%d, %04x)", instr->br.ioff, off);
- break;
- }
- case OPC_CALL:
- assert(!rep);
- printf("call #");
- printlbl("%s", fxn_name(instr->call.uoff));
- if (verbose) {
- printf(" (%04x)", instr->call.uoff);
- if (instr->br.bit_or_imm || instr->br.src) {
- printerr(" (src=%03x, bit=%03x) ",
- instr->br.src, instr->br.bit_or_imm);
- }
- }
- break;
- case OPC_RET:
- assert(!rep);
- if (instr->ret.pad)
- printf("[%08x] ; ", instrs[i]);
- if (instr->ret.interrupt)
- printf("iret");
- else
- printf("ret");
- break;
- case OPC_WIN:
- assert(!rep);
- if (instr->waitin.pad)
- printf("[%08x] ; ", instrs[i]);
- printf("waitin");
- if (verbose && instr->waitin.pad)
- printerr(" (pad=%x)", instr->waitin.pad);
- break;
- case OPC_PREEMPTLEAVE6:
- if (gpuver < 6) {
- printf("[%08x] ; op38", instrs[i]);
- } else {
- printf("preemptleave #");
- printlbl("%s", label_name(instr->call.uoff, true));
- }
- break;
- case OPC_SETSECURE:
- /* Note: This seems to implicitly read the secure/not-secure state
- * to set from the low bit of $02, and implicitly jumps to pc + 3
- * (i.e. skipping the next two instructions) if it succeeds. We
- * print these implicit parameters to make reading the disassembly
- * easier.
- */
- if (instr->pad)
- printf("[%08x] ; ", instrs[i]);
- printf("setsecure $02, #");
- printlbl("%s", label_name(i + 3, true));
- break;
- default:
- printerr("[%08x]", instrs[i]);
- printf(" ; op%02x ", opc);
- print_dst(instr->alui.dst);
- printf(", ");
- print_src(instr->alui.src);
- print_gpu_reg(instrs[i] & 0xffff);
- break;
- }
- printf("\n");
- }
-
- /* print jumptable: */
- if (verbose) {
- printf(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
- printf("; JUMP TABLE\n");
- for (i = 0; i < 0x7f; i++) {
- int n = i;// + CP_NOP;
- uint32_t offset = jmptbl[i];
- const char *name = getpm4(n);
- printf("%3d %02x: ", n, n);
- printf("%04x", offset);
- if (name) {
- printf(" ; %s", name);
- } else {
- printf(" ; UNKN%d", n);
- }
- printf("\n");
- }
- }
+ uint32_t *instrs = buf;
+ const int jmptbl_start = instrs[1] & 0xffff;
+ uint32_t *jmptbl = &buf[jmptbl_start];
+ afuc_opc opc;
+ bool rep;
+ int i;
+
+ /* parse jumptable: */
+ for (i = 0; i < 0x80; i++) {
+ unsigned offset = jmptbl[i];
+ unsigned n = i; // + CP_NOP;
+ add_jump_table_entry(n, offset);
+ }
+
+ /* do a pre-pass to find instructions that are potential branch targets,
+ * and add labels for them:
+ */
+ for (i = 0; i < jmptbl_start; i++) {
+ afuc_instr *instr = (void *)&instrs[i];
+
+ afuc_get_opc(instr, &opc, &rep);
+
+ switch (opc) {
+ case OPC_BRNEI:
+ case OPC_BREQI:
+ case OPC_BRNEB:
+ case OPC_BREQB:
+ label_idx(i + instr->br.ioff, true);
+ break;
+ case OPC_PREEMPTLEAVE6:
+ if (gpuver >= 6)
+ label_idx(instr->call.uoff, true);
+ break;
+ case OPC_CALL:
+ fxn_idx(instr->call.uoff, true);
+ break;
+ case OPC_SETSECURE:
+ /* this implicitly jumps to pc + 3 if successful */
+ label_idx(i + 3, true);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* print instructions: */
+ for (i = 0; i < jmptbl_start; i++) {
+ int jump_label_idx;
+ afuc_instr *instr = (void *)&instrs[i];
+ const char *fname, *lname;
+ afuc_opc opc;
+ bool rep;
+
+ afuc_get_opc(instr, &opc, &rep);
+
+ lname = label_name(i, false);
+ fname = fxn_name(i);
+ jump_label_idx = get_jump_table_entry(i);
+
+ if (jump_label_idx >= 0) {
+ int j;
+ printf("\n");
+ for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) {
+ uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j];
+ const char *name = getpm4(jump_label);
+ if (name) {
+ printlbl("%s", name);
+ } else {
+ printlbl("UNKN%d", jump_label);
+ }
+ printf(":\n");
+ }
+ }
+
+ if (fname) {
+ printlbl("%s", fname);
+ printf(":\n");
+ }
+
+ if (lname) {
+ printlbl(" %s", lname);
+ printf(":");
+ } else {
+ printf(" ");
+ }
+
+ if (verbose) {
+ printf("\t%04x: %08x ", i, instrs[i]);
+ } else {
+ printf(" ");
+ }
+
+ switch (opc) {
+ case OPC_NOP: {
+ /* a6xx changed the default immediate, and apparently 0
+ * is illegal now.
+ */
+ const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0;
+ if (instrs[i] != nop) {
+ printerr("[%08x]", instrs[i]);
+ printf(" ; ");
+ }
+ if (rep)
+ printf("(rep)");
+ printf("nop");
+ print_gpu_reg(instrs[i]);
+
+ break;
+ }
+ case OPC_ADD:
+ case OPC_ADDHI:
+ case OPC_SUB:
+ case OPC_SUBHI:
+ case OPC_AND:
+ case OPC_OR:
+ case OPC_XOR:
+ case OPC_NOT:
+ case OPC_SHL:
+ case OPC_USHR:
+ case OPC_ISHR:
+ case OPC_ROT:
+ case OPC_MUL8:
+ case OPC_MIN:
+ case OPC_MAX:
+ case OPC_CMP: {
+ bool src1 = true;
+
+ if (opc == OPC_NOT)
+ src1 = false;
+
+ if (rep)
+ printf("(rep)");
+
+ print_alu_name(opc, instrs[i]);
+ print_dst(instr->alui.dst);
+ printf(", ");
+ if (src1) {
+ print_src(instr->alui.src);
+ printf(", ");
+ }
+ printf("0x%04x", instr->alui.uimm);
+ print_gpu_reg(instr->alui.uimm);
+
+ /* print out unexpected bits: */
+ if (verbose) {
+ if (instr->alui.src && !src1)
+ printerr(" (src=%02x)", instr->alui.src);
+ }
+
+ break;
+ }
+ case OPC_MOVI: {
+ if (rep)
+ printf("(rep)");
+ printf("mov ");
+ print_dst(instr->movi.dst);
+ printf(", 0x%04x", instr->movi.uimm);
+ if (instr->movi.shift)
+ printf(" << %u", instr->movi.shift);
+
+ /* using mov w/ << 16 is popular way to construct a pkt7
+ * header to send (for ex, from PFP to ME), so check that
+ * case first
+ */
+ if ((instr->movi.shift == 16) &&
+ ((instr->movi.uimm & 0xff00) == 0x7000)) {
+ unsigned opc, p;
+
+ opc = instr->movi.uimm & 0x7f;
+ p = _odd_parity_bit(opc);
+
+ /* So, you'd think that checking the parity bit would be
+ * a good way to rule out false positives, but seems like
+ * ME doesn't really care.. at least it would filter out
+ * things that look like actual legit packets between
+ * PFP and ME..
+ */
+ if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) {
+ const char *name = getpm4(opc);
+ printf("\t; ");
+ if (name)
+ printlbl("%s", name);
+ else
+ printlbl("UNKN%u", opc);
+ break;
+ }
+ }
+
+ print_gpu_reg(instr->movi.uimm << instr->movi.shift);
+
+ break;
+ }
+ case OPC_ALU: {
+ bool src1 = true;
+
+ if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB)
+ src1 = false;
+
+ if (instr->alu.pad)
+ printf("[%08x] ; ", instrs[i]);
+
+ if (rep)
+ printf("(rep)");
+ if (instr->alu.xmov)
+ printf("(xmov%d)", instr->alu.xmov);
+
+ /* special case mnemonics:
+ * reading $00 seems to always yield zero, and so:
+ * or $dst, $00, $src -> mov $dst, $src
+ * Maybe add one for negate too, ie.
+ * sub $dst, $00, $src ???
+ */
+ if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) {
+ printf("mov ");
+ src1 = false;
+ } else {
+ print_alu_name(instr->alu.alu, instrs[i]);
+ }
+
+ print_dst(instr->alu.dst);
+ if (src1) {
+ printf(", ");
+ print_src(instr->alu.src1);
+ }
+ printf(", ");
+ print_src(instr->alu.src2);
+
+ /* print out unexpected bits: */
+ if (verbose) {
+ if (instr->alu.pad)
+ printerr(" (pad=%01x)", instr->alu.pad);
+ if (instr->alu.src1 && !src1)
+ printerr(" (src1=%02x)", instr->alu.src1);
+ }
+
+ /* xmov is a modifier that makes the processor execute up to 3
+ * extra mov's after the current instruction. Given an ALU
+ * instruction:
+ *
+ * (xmovN) alu $dst, $src1, $src2
+ *
+ * In all of the uses in the firmware blob, $dst and $src2 are one
+ * of the "special" registers $data, $addr, $addr2. I've observed
+ * that if $dst isn't "special" then it's replaced with $00
+ * instead of $data, but I haven't checked what happens if $src2
+ * isn't "special". Anyway, in the usual case, the HW produces a
+ * count M = min(N, $rem) and then does the following:
+ *
+ * M = 1:
+ * mov $data, $src2
+ *
+ * M = 2:
+ * mov $data, $src2
+ * mov $data, $src2
+ *
+ * M = 3:
+ * mov $data, $src2
+ * mov $dst, $src2 (special case for CP_CONTEXT_REG_BUNCH)
+ * mov $data, $src2
+ *
+ * It seems to be frequently used in combination with (rep) to
+ * provide a kind of hardware-based loop unrolling, and there's
+ * even a special case in the ISA to be able to do this with
+ * CP_CONTEXT_REG_BUNCH. However (rep) isn't required.
+ *
+ * This dumps the expected extra instructions, assuming that $rem
+ * isn't too small.
+ */
+ if (verbose && instr->alu.xmov) {
+ for (int i = 0; i < instr->alu.xmov; i++) {
+ printf("\n ; mov ");
+ if (instr->alu.dst < 0x1d)
+ printf("$00");
+ else if (instr->alu.xmov == 3 && i == 1)
+ print_dst(instr->alu.dst);
+ else
+ printf("$data");
+ printf(", ");
+ print_src(instr->alu.src2);
+ }
+ }
+
+ break;
+ }
+ case OPC_CWRITE6:
+ case OPC_CREAD6:
+ case OPC_STORE6:
+ case OPC_LOAD6: {
+ if (rep)
+ printf("(rep)");
+
+ bool is_control_reg = true;
+ if (gpuver >= 6) {
+ switch (opc) {
+ case OPC_CWRITE6:
+ printf("cwrite ");
+ break;
+ case OPC_CREAD6:
+ printf("cread ");
+ break;
+ case OPC_STORE6:
+ is_control_reg = false;
+ printf("store ");
+ break;
+ case OPC_LOAD6:
+ is_control_reg = false;
+ printf("load ");
+ break;
+ default:
+ assert(!"unreachable");
+ }
+ } else {
+ switch (opc) {
+ case OPC_CWRITE5:
+ printf("cwrite ");
+ break;
+ case OPC_CREAD5:
+ printf("cread ");
+ break;
+ default:
+ fprintf(stderr, "A6xx control opcode on A5xx?\n");
+ exit(1);
+ }
+ }
+
+ print_src(instr->control.src1);
+ printf(", [");
+ print_src(instr->control.src2);
+ printf(" + ");
+ if (is_control_reg && instr->control.flags != 0x4)
+ print_control_reg(instr->control.uimm);
+ else
+ printf("0x%03x", instr->control.uimm);
+ printf("], 0x%x", instr->control.flags);
+ break;
+ }
+ case OPC_BRNEI:
+ case OPC_BREQI:
+ case OPC_BRNEB:
+ case OPC_BREQB: {
+ unsigned off = i + instr->br.ioff;
+
+ assert(!rep);
+
+ /* Since $00 reads back zero, it can be used as src for
+ * unconditional branches. (This only really makes sense
+ * for the BREQB.. or possible BRNEI if imm==0.)
+ *
+ * If bit=0 then branch is taken if *all* bits are zero.
+ * Otherwise it is taken if bit (bit-1) is clear.
+ *
+ * Note the instruction after a jump/branch is executed
+ * regardless of whether branch is taken, so use nop or
+ * take that into account in code.
+ */
+ if (instr->br.src || (opc != OPC_BRNEB)) {
+ bool immed = false;
+
+ if (opc == OPC_BRNEI) {
+ printf("brne ");
+ immed = true;
+ } else if (opc == OPC_BREQI) {
+ printf("breq ");
+ immed = true;
+ } else if (opc == OPC_BRNEB) {
+ printf("brne ");
+ } else if (opc == OPC_BREQB) {
+ printf("breq ");
+ }
+ print_src(instr->br.src);
+ if (immed) {
+ printf(", 0x%x,", instr->br.bit_or_imm);
+ } else {
+ printf(", b%u,", instr->br.bit_or_imm);
+ }
+ } else {
+ printf("jump");
+ if (verbose && instr->br.bit_or_imm) {
+ printerr(" (src=%03x, bit=%03x) ", instr->br.src,
+ instr->br.bit_or_imm);
+ }
+ }
+
+ printf(" #");
+ printlbl("%s", label_name(off, true));
+ if (verbose)
+ printf(" (#%d, %04x)", instr->br.ioff, off);
+ break;
+ }
+ case OPC_CALL:
+ assert(!rep);
+ printf("call #");
+ printlbl("%s", fxn_name(instr->call.uoff));
+ if (verbose) {
+ printf(" (%04x)", instr->call.uoff);
+ if (instr->br.bit_or_imm || instr->br.src) {
+ printerr(" (src=%03x, bit=%03x) ", instr->br.src,
+ instr->br.bit_or_imm);
+ }
+ }
+ break;
+ case OPC_RET:
+ assert(!rep);
+ if (instr->ret.pad)
+ printf("[%08x] ; ", instrs[i]);
+ if (instr->ret.interrupt)
+ printf("iret");
+ else
+ printf("ret");
+ break;
+ case OPC_WIN:
+ assert(!rep);
+ if (instr->waitin.pad)
+ printf("[%08x] ; ", instrs[i]);
+ printf("waitin");
+ if (verbose && instr->waitin.pad)
+ printerr(" (pad=%x)", instr->waitin.pad);
+ break;
+ case OPC_PREEMPTLEAVE6:
+ if (gpuver < 6) {
+ printf("[%08x] ; op38", instrs[i]);
+ } else {
+ printf("preemptleave #");
+ printlbl("%s", label_name(instr->call.uoff, true));
+ }
+ break;
+ case OPC_SETSECURE:
+ /* Note: This seems to implicitly read the secure/not-secure state
+ * to set from the low bit of $02, and implicitly jumps to pc + 3
+ * (i.e. skipping the next two instructions) if it succeeds. We
+ * print these implicit parameters to make reading the disassembly
+ * easier.
+ */
+ if (instr->pad)
+ printf("[%08x] ; ", instrs[i]);
+ printf("setsecure $02, #");
+ printlbl("%s", label_name(i + 3, true));
+ break;
+ default:
+ printerr("[%08x]", instrs[i]);
+ printf(" ; op%02x ", opc);
+ print_dst(instr->alui.dst);
+ printf(", ");
+ print_src(instr->alui.src);
+ print_gpu_reg(instrs[i] & 0xffff);
+ break;
+ }
+ printf("\n");
+ }
+
+ /* print jumptable: */
+ if (verbose) {
+ printf(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
+ printf("; JUMP TABLE\n");
+ for (i = 0; i < 0x7f; i++) {
+ int n = i; // + CP_NOP;
+ uint32_t offset = jmptbl[i];
+ const char *name = getpm4(n);
+ printf("%3d %02x: ", n, n);
+ printf("%04x", offset);
+ if (name) {
+ printf(" ; %s", name);
+ } else {
+ printf(" ; UNKN%d", n);
+ }
+ printf("\n");
+ }
+ }
}
-static void usage(void)
+static void
+usage(void)
{
- fprintf(stderr, "Usage:\n"
- "\tdisasm [-g GPUVER] [-v] [-c] filename.asm\n"
- "\t\t-g - specify GPU version (5, etc)\n"
- "\t\t-c - use colors\n"
- "\t\t-v - verbose output\n"
- );
- exit(2);
+ fprintf(stderr, "Usage:\n"
+ "\tdisasm [-g GPUVER] [-v] [-c] filename.asm\n"
+ "\t\t-g - specify GPU version (5, etc)\n"
+ "\t\t-c - use colors\n"
+ "\t\t-v - verbose output\n");
+ exit(2);
}
-int main(int argc, char **argv)
+int
+main(int argc, char **argv)
{
- uint32_t *buf;
- char *file, *control_reg_name;
- bool colors = false;
- size_t sz;
- int c;
-
- /* Argument parsing: */
- while ((c = getopt (argc, argv, "g:vc")) != -1) {
- switch (c) {
- case 'g':
- gpuver = atoi(optarg);
- break;
- case 'v':
- verbose = true;
- break;
- case 'c':
- colors = true;
- break;
- default:
- usage();
- }
- }
-
- if (optind >= argc) {
- fprintf(stderr, "no file specified!\n");
- usage();
- }
-
- file = argv[optind];
-
- /* if gpu version not specified, infer from filename: */
- if (!gpuver) {
- if (strstr(file, "a5")) {
- gpuver = 5;
- } else if (strstr(file, "a6")) {
- gpuver = 6;
- }
- }
-
- switch (gpuver) {
- case 6:
- printf("; a6xx microcode\n");
- variant = "A6XX";
- control_reg_name = "A6XX_CONTROL_REG";
- break;
- case 5:
- printf("; a5xx microcode\n");
- variant = "A5XX";
- control_reg_name = "A5XX_CONTROL_REG";
- break;
- default:
- fprintf(stderr, "unknown GPU version!\n");
- usage();
- }
-
- rnn_init();
- db = rnn_newdb();
-
- ctx = rnndec_newcontext(db);
- ctx->colors = colors ? &envy_def_colors : &envy_null_colors;
-
- rnn_parsefile(db, "adreno.xml");
- rnn_prepdb(db);
- if (db->estatus)
- errx(db->estatus, "failed to parse register database");
- dom[0] = rnn_finddomain(db, variant);
- dom[1] = rnn_finddomain(db, "AXXX");
- control_regs = rnn_finddomain(db, control_reg_name);
-
- rnndec_varadd(ctx, "chip", variant);
-
- buf = (uint32_t *)os_read_file(file, &sz);
-
- printf("; Disassembling microcode: %s\n", file);
- printf("; Version: %08x\n\n", buf[1]);
- disasm(&buf[1], sz/4 - 1);
-
- return 0;
+ uint32_t *buf;
+ char *file, *control_reg_name;
+ bool colors = false;
+ size_t sz;
+ int c;
+
+ /* Argument parsing: */
+ while ((c = getopt(argc, argv, "g:vc")) != -1) {
+ switch (c) {
+ case 'g':
+ gpuver = atoi(optarg);
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ case 'c':
+ colors = true;
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (optind >= argc) {
+ fprintf(stderr, "no file specified!\n");
+ usage();
+ }
+
+ file = argv[optind];
+
+ /* if gpu version not specified, infer from filename: */
+ if (!gpuver) {
+ if (strstr(file, "a5")) {
+ gpuver = 5;
+ } else if (strstr(file, "a6")) {
+ gpuver = 6;
+ }
+ }
+
+ switch (gpuver) {
+ case 6:
+ printf("; a6xx microcode\n");
+ variant = "A6XX";
+ control_reg_name = "A6XX_CONTROL_REG";
+ break;
+ case 5:
+ printf("; a5xx microcode\n");
+ variant = "A5XX";
+ control_reg_name = "A5XX_CONTROL_REG";
+ break;
+ default:
+ fprintf(stderr, "unknown GPU version!\n");
+ usage();
+ }
+
+ rnn_init();
+ db = rnn_newdb();
+
+ ctx = rnndec_newcontext(db);
+ ctx->colors = colors ? &envy_def_colors : &envy_null_colors;
+
+ rnn_parsefile(db, "adreno.xml");
+ rnn_prepdb(db);
+ if (db->estatus)
+ errx(db->estatus, "failed to parse register database");
+ dom[0] = rnn_finddomain(db, variant);
+ dom[1] = rnn_finddomain(db, "AXXX");
+ control_regs = rnn_finddomain(db, control_reg_name);
+
+ rnndec_varadd(ctx, "chip", variant);
+
+ buf = (uint32_t *)os_read_file(file, &sz);
+
+ printf("; Disassembling microcode: %s\n", file);
+ printf("; Version: %08x\n\n", buf[1]);
+ disasm(&buf[1], sz / 4 - 1);
+
+ return 0;
}