From 8c79c710d4e1f3e424d5abf1f9abccdfc9a59caa Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 2 Mar 2020 07:57:51 -0500 Subject: [PATCH] pan/bi: Identify extended FMA opcodes When the top 3 bits of the opcode are 111, it leads to a special extended opcode mode instead. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/disassemble.c | 246 +++++++++++++++++++------------------ 1 file changed, 127 insertions(+), 119 deletions(-) diff --git a/src/panfrost/bifrost/disassemble.c b/src/panfrost/bifrost/disassemble.c index 01445ad..0870719 100644 --- a/src/panfrost/bifrost/disassemble.c +++ b/src/panfrost/bifrost/disassemble.c @@ -109,6 +109,7 @@ enum fma_src_type { }; struct fma_op_info { + bool extended; unsigned op; char name[30]; enum fma_src_type src_type; @@ -575,129 +576,136 @@ csel_cond_name(enum bifrost_csel_cond cond) } static const struct fma_op_info FMAOpInfos[] = { - { 0x00000, "FMA.f32", FMA_FMA }, - { 0x40000, "MAX.f32", FMA_FMINMAX }, - { 0x44000, "MIN.f32", FMA_FMINMAX }, - { 0x48000, "FCMP.GL", FMA_FCMP }, - { 0x4c000, "FCMP.D3D", FMA_FCMP }, - { 0x4ff98, "ADD.i32", FMA_TWO_SRC }, - { 0x4ffd8, "SUB.i32", FMA_TWO_SRC }, - { 0x4fff0, "SUBB.i32", FMA_TWO_SRC }, - { 0x50000, "FMA_MSCALE", FMA_FMA_MSCALE }, - { 0x58000, "ADD.f32", FMA_FADD }, - { 0x5c000, "CSEL4", FMA_CSEL4 }, - { 0x5d8d0, "ICMP.D3D.GT.v2i16", FMA_TWO_SRC }, - { 0x5d9d0, "UCMP.D3D.GT.v2i16", FMA_TWO_SRC }, - { 0x5dad0, "ICMP.D3D.GE.v2i16", FMA_TWO_SRC }, - { 0x5dbd0, "UCMP.D3D.GE.v2i16", FMA_TWO_SRC }, - { 0x5dcd0, "ICMP.D3D.EQ.v2i16", FMA_TWO_SRC }, - { 0x5de40, "ICMP.GL.GT.i32", FMA_TWO_SRC }, // src0 > src1 ? 1 : 0 - { 0x5de48, "ICMP.GL.GE.i32", FMA_TWO_SRC }, - { 0x5de50, "UCMP.GL.GT.i32", FMA_TWO_SRC }, - { 0x5de58, "UCMP.GL.GE.i32", FMA_TWO_SRC }, - { 0x5de60, "ICMP.GL.EQ.i32", FMA_TWO_SRC }, - { 0x5dec0, "ICMP.D3D.GT.i32", FMA_TWO_SRC }, // src0 > src1 ? ~0 : 0 - { 0x5dec8, "ICMP.D3D.GE.i32", FMA_TWO_SRC }, - { 0x5ded0, "UCMP.D3D.GT.i32", FMA_TWO_SRC }, - { 0x5ded8, "UCMP.D3D.GE.i32", FMA_TWO_SRC }, - { 0x5dee0, "ICMP.D3D.EQ.i32", FMA_TWO_SRC }, - { 0x60000, "RSHIFT_NAND", FMA_SHIFT }, - { 0x61000, "RSHIFT_AND", FMA_SHIFT }, - { 0x62000, "LSHIFT_NAND", FMA_SHIFT }, - { 0x63000, "LSHIFT_AND", FMA_SHIFT }, // (src0 << src2) & src1 - { 0x64000, "RSHIFT_XOR", FMA_SHIFT }, - { 0x65200, "LSHIFT_ADD.i32", FMA_THREE_SRC }, - { 0x65600, "LSHIFT_SUB.i32", FMA_THREE_SRC }, // (src0 << src2) - src1 - { 0x65a00, "LSHIFT_RSUB.i32", FMA_THREE_SRC }, // src1 - (src0 << src2) - { 0x65e00, "RSHIFT_ADD.i32", FMA_THREE_SRC }, - { 0x66200, "RSHIFT_SUB.i32", FMA_THREE_SRC }, - { 0x66600, "RSHIFT_RSUB.i32", FMA_THREE_SRC }, - { 0x66a00, "ARSHIFT_ADD.i32", FMA_THREE_SRC }, - { 0x66e00, "ARSHIFT_SUB.i32", FMA_THREE_SRC }, - { 0x67200, "ARSHIFT_RSUB.i32", FMA_THREE_SRC }, - { 0x80000, "FMA.v2f16", FMA_FMA16 }, - { 0xc0000, "MAX.v2f16", FMA_FMINMAX16 }, - { 0xc4000, "MIN.v2f16", FMA_FMINMAX16 }, - { 0xc8000, "FCMP.GL", FMA_FCMP16 }, - { 0xcc000, "FCMP.D3D", FMA_FCMP16 }, - { 0xcf900, "ADD.v2i16", FMA_TWO_SRC }, - { 0xcfc10, "ADDC.i32", FMA_TWO_SRC }, - { 0xcfd80, "ADD.i32.i16.X", FMA_TWO_SRC }, - { 0xcfd90, "ADD.i32.u16.X", FMA_TWO_SRC }, - { 0xcfdc0, "ADD.i32.i16.Y", FMA_TWO_SRC }, - { 0xcfdd0, "ADD.i32.u16.Y", FMA_TWO_SRC }, - { 0xd8000, "ADD.v2f16", FMA_FADD16 }, - { 0xdc000, "CSEL4.v16", FMA_CSEL4 }, - { 0xdd000, "F32_TO_F16", FMA_TWO_SRC }, - { 0xe0046, "F16_TO_I16.XX", FMA_ONE_SRC }, - { 0xe0047, "F16_TO_U16.XX", FMA_ONE_SRC }, - { 0xe004e, "F16_TO_I16.YX", FMA_ONE_SRC }, - { 0xe004f, "F16_TO_U16.YX", FMA_ONE_SRC }, - { 0xe0056, "F16_TO_I16.XY", FMA_ONE_SRC }, - { 0xe0057, "F16_TO_U16.XY", FMA_ONE_SRC }, - { 0xe005e, "F16_TO_I16.YY", FMA_ONE_SRC }, - { 0xe005f, "F16_TO_U16.YY", FMA_ONE_SRC }, - { 0xe00c0, "I16_TO_F16.XX", FMA_ONE_SRC }, - { 0xe00c1, "U16_TO_F16.XX", FMA_ONE_SRC }, - { 0xe00c8, "I16_TO_F16.YX", FMA_ONE_SRC }, - { 0xe00c9, "U16_TO_F16.YX", FMA_ONE_SRC }, - { 0xe00d0, "I16_TO_F16.XY", FMA_ONE_SRC }, - { 0xe00d1, "U16_TO_F16.XY", FMA_ONE_SRC }, - { 0xe00d8, "I16_TO_F16.YY", FMA_ONE_SRC }, - { 0xe00d9, "U16_TO_F16.YY", FMA_ONE_SRC }, - { 0xe0136, "F32_TO_I32", FMA_ONE_SRC }, - { 0xe0137, "F32_TO_U32", FMA_ONE_SRC }, - { 0xe0178, "I32_TO_F32", FMA_ONE_SRC }, - { 0xe0179, "U32_TO_F32", FMA_ONE_SRC }, - { 0xe0198, "I16_TO_I32.X", FMA_ONE_SRC }, - { 0xe0199, "U16_TO_U32.X", FMA_ONE_SRC }, - { 0xe019a, "I16_TO_I32.Y", FMA_ONE_SRC }, - { 0xe019b, "U16_TO_U32.Y", FMA_ONE_SRC }, - { 0xe019c, "I16_TO_F32.X", FMA_ONE_SRC }, - { 0xe019d, "U16_TO_F32.X", FMA_ONE_SRC }, - { 0xe019e, "I16_TO_F32.Y", FMA_ONE_SRC }, - { 0xe019f, "U16_TO_F32.Y", FMA_ONE_SRC }, - { 0xe01a2, "F16_TO_F32.X", FMA_ONE_SRC }, - { 0xe01a3, "F16_TO_F32.Y", FMA_ONE_SRC }, - { 0xe032c, "NOP", FMA_ONE_SRC }, - { 0xe032d, "MOV", FMA_ONE_SRC }, - { 0xe032f, "SWZ.YY.v2i16", FMA_ONE_SRC }, - { 0xe0345, "LOG_FREXPM", FMA_ONE_SRC }, - { 0xe0365, "FRCP_FREXPM", FMA_ONE_SRC }, - { 0xe0375, "FSQRT_FREXPM", FMA_ONE_SRC }, - { 0xe038d, "FRCP_FREXPE", FMA_ONE_SRC }, - { 0xe03a5, "FSQRT_FREXPE", FMA_ONE_SRC }, - { 0xe03ad, "FRSQ_FREXPE", FMA_ONE_SRC }, - { 0xe03c5, "LOG_FREXPE", FMA_ONE_SRC }, - { 0xe03fa, "CLZ", FMA_ONE_SRC }, - { 0xe0b80, "IMAX3", FMA_THREE_SRC }, - { 0xe0bc0, "UMAX3", FMA_THREE_SRC }, - { 0xe0c00, "IMIN3", FMA_THREE_SRC }, - { 0xe0c40, "UMIN3", FMA_THREE_SRC }, - { 0xe0ec5, "ROUND", FMA_ONE_SRC }, - { 0xe0f40, "CSEL", FMA_THREE_SRC }, // src2 != 0 ? src1 : src0 - { 0xe0fc0, "MUX.i32", FMA_THREE_SRC }, // see ADD comment - { 0xe1805, "ROUNDEVEN", FMA_ONE_SRC }, - { 0xe1845, "CEIL", FMA_ONE_SRC }, - { 0xe1885, "FLOOR", FMA_ONE_SRC }, - { 0xe18c5, "TRUNC", FMA_ONE_SRC }, - { 0xe19b0, "ATAN_LDEXP.Y.f32", FMA_TWO_SRC }, - { 0xe19b8, "ATAN_LDEXP.X.f32", FMA_TWO_SRC }, - { 0xe1c80, "LSHIFT_ADD_LOW32.u32", FMA_SHIFT_ADD64 }, - { 0xe1cc0, "LSHIFT_ADD_LOW32.i64", FMA_SHIFT_ADD64 }, - { 0xe1d80, "LSHIFT_ADD_LOW32.i32", FMA_SHIFT_ADD64 }, - { 0xe1e00, "SEL.XX.i16", FMA_TWO_SRC }, - { 0xe1e08, "SEL.YX.i16", FMA_TWO_SRC }, - { 0xe1e10, "SEL.XY.i16", FMA_TWO_SRC }, - { 0xe1e18, "SEL.YY.i16", FMA_TWO_SRC }, - { 0xe7800, "IMAD", FMA_THREE_SRC }, - { 0xe78db, "POPCNT", FMA_ONE_SRC }, + { false, 0x00000, "FMA.f32", FMA_FMA }, + { false, 0x40000, "MAX.f32", FMA_FMINMAX }, + { false, 0x44000, "MIN.f32", FMA_FMINMAX }, + { false, 0x48000, "FCMP.GL", FMA_FCMP }, + { false, 0x4c000, "FCMP.D3D", FMA_FCMP }, + { false, 0x4ff98, "ADD.i32", FMA_TWO_SRC }, + { false, 0x4ffd8, "SUB.i32", FMA_TWO_SRC }, + { false, 0x4fff0, "SUBB.i32", FMA_TWO_SRC }, + { false, 0x50000, "FMA_MSCALE", FMA_FMA_MSCALE }, + { false, 0x58000, "ADD.f32", FMA_FADD }, + { false, 0x5c000, "CSEL4", FMA_CSEL4 }, + { false, 0x5d8d0, "ICMP.D3D.GT.v2i16", FMA_TWO_SRC }, + { false, 0x5d9d0, "UCMP.D3D.GT.v2i16", FMA_TWO_SRC }, + { false, 0x5dad0, "ICMP.D3D.GE.v2i16", FMA_TWO_SRC }, + { false, 0x5dbd0, "UCMP.D3D.GE.v2i16", FMA_TWO_SRC }, + { false, 0x5dcd0, "ICMP.D3D.EQ.v2i16", FMA_TWO_SRC }, + { false, 0x5de40, "ICMP.GL.GT.i32", FMA_TWO_SRC }, // src0 > src1 ? 1 : 0 + { false, 0x5de48, "ICMP.GL.GE.i32", FMA_TWO_SRC }, + { false, 0x5de50, "UCMP.GL.GT.i32", FMA_TWO_SRC }, + { false, 0x5de58, "UCMP.GL.GE.i32", FMA_TWO_SRC }, + { false, 0x5de60, "ICMP.GL.EQ.i32", FMA_TWO_SRC }, + { false, 0x5dec0, "ICMP.D3D.GT.i32", FMA_TWO_SRC }, // src0 > src1 ? ~0 : 0 + { false, 0x5dec8, "ICMP.D3D.GE.i32", FMA_TWO_SRC }, + { false, 0x5ded0, "UCMP.D3D.GT.i32", FMA_TWO_SRC }, + { false, 0x5ded8, "UCMP.D3D.GE.i32", FMA_TWO_SRC }, + { false, 0x5dee0, "ICMP.D3D.EQ.i32", FMA_TWO_SRC }, + { false, 0x60000, "RSHIFT_NAND", FMA_SHIFT }, + { false, 0x61000, "RSHIFT_AND", FMA_SHIFT }, + { false, 0x62000, "LSHIFT_NAND", FMA_SHIFT }, + { false, 0x63000, "LSHIFT_AND", FMA_SHIFT }, // (src0 << src2) & src1 + { false, 0x64000, "RSHIFT_XOR", FMA_SHIFT }, + { false, 0x65200, "LSHIFT_ADD.i32", FMA_THREE_SRC }, + { false, 0x65600, "LSHIFT_SUB.i32", FMA_THREE_SRC }, // (src0 << src2) - src1 + { false, 0x65a00, "LSHIFT_RSUB.i32", FMA_THREE_SRC }, // src1 - (src0 << src2) + { false, 0x65e00, "RSHIFT_ADD.i32", FMA_THREE_SRC }, + { false, 0x66200, "RSHIFT_SUB.i32", FMA_THREE_SRC }, + { false, 0x66600, "RSHIFT_RSUB.i32", FMA_THREE_SRC }, + { false, 0x66a00, "ARSHIFT_ADD.i32", FMA_THREE_SRC }, + { false, 0x66e00, "ARSHIFT_SUB.i32", FMA_THREE_SRC }, + { false, 0x67200, "ARSHIFT_RSUB.i32", FMA_THREE_SRC }, + { false, 0x80000, "FMA.v2f16", FMA_FMA16 }, + { false, 0xc0000, "MAX.v2f16", FMA_FMINMAX16 }, + { false, 0xc4000, "MIN.v2f16", FMA_FMINMAX16 }, + { false, 0xc8000, "FCMP.GL", FMA_FCMP16 }, + { false, 0xcc000, "FCMP.D3D", FMA_FCMP16 }, + { false, 0xcf900, "ADD.v2i16", FMA_TWO_SRC }, + { false, 0xcfc10, "ADDC.i32", FMA_TWO_SRC }, + { false, 0xcfd80, "ADD.i32.i16.X", FMA_TWO_SRC }, + { false, 0xcfd90, "ADD.i32.u16.X", FMA_TWO_SRC }, + { false, 0xcfdc0, "ADD.i32.i16.Y", FMA_TWO_SRC }, + { false, 0xcfdd0, "ADD.i32.u16.Y", FMA_TWO_SRC }, + { false, 0xd8000, "ADD.v2f16", FMA_FADD16 }, + { false, 0xdc000, "CSEL4.v16", FMA_CSEL4 }, + { false, 0xdd000, "F32_TO_F16", FMA_TWO_SRC }, + { true, 0x00046, "F16_TO_I16.XX", FMA_ONE_SRC }, + { true, 0x00047, "F16_TO_U16.XX", FMA_ONE_SRC }, + { true, 0x0004e, "F16_TO_I16.YX", FMA_ONE_SRC }, + { true, 0x0004f, "F16_TO_U16.YX", FMA_ONE_SRC }, + { true, 0x00056, "F16_TO_I16.XY", FMA_ONE_SRC }, + { true, 0x00057, "F16_TO_U16.XY", FMA_ONE_SRC }, + { true, 0x0005e, "F16_TO_I16.YY", FMA_ONE_SRC }, + { true, 0x0005f, "F16_TO_U16.YY", FMA_ONE_SRC }, + { true, 0x000c0, "I16_TO_F16.XX", FMA_ONE_SRC }, + { true, 0x000c1, "U16_TO_F16.XX", FMA_ONE_SRC }, + { true, 0x000c8, "I16_TO_F16.YX", FMA_ONE_SRC }, + { true, 0x000c9, "U16_TO_F16.YX", FMA_ONE_SRC }, + { true, 0x000d0, "I16_TO_F16.XY", FMA_ONE_SRC }, + { true, 0x000d1, "U16_TO_F16.XY", FMA_ONE_SRC }, + { true, 0x000d8, "I16_TO_F16.YY", FMA_ONE_SRC }, + { true, 0x000d9, "U16_TO_F16.YY", FMA_ONE_SRC }, + { true, 0x00136, "F32_TO_I32", FMA_ONE_SRC }, + { true, 0x00137, "F32_TO_U32", FMA_ONE_SRC }, + { true, 0x00178, "I32_TO_F32", FMA_ONE_SRC }, + { true, 0x00179, "U32_TO_F32", FMA_ONE_SRC }, + { true, 0x00198, "I16_TO_I32.X", FMA_ONE_SRC }, + { true, 0x00199, "U16_TO_U32.X", FMA_ONE_SRC }, + { true, 0x0019a, "I16_TO_I32.Y", FMA_ONE_SRC }, + { true, 0x0019b, "U16_TO_U32.Y", FMA_ONE_SRC }, + { true, 0x0019c, "I16_TO_F32.X", FMA_ONE_SRC }, + { true, 0x0019d, "U16_TO_F32.X", FMA_ONE_SRC }, + { true, 0x0019e, "I16_TO_F32.Y", FMA_ONE_SRC }, + { true, 0x0019f, "U16_TO_F32.Y", FMA_ONE_SRC }, + { true, 0x001a2, "F16_TO_F32.X", FMA_ONE_SRC }, + { true, 0x001a3, "F16_TO_F32.Y", FMA_ONE_SRC }, + { true, 0x0032c, "NOP", FMA_ONE_SRC }, + { true, 0x0032d, "MOV", FMA_ONE_SRC }, + { true, 0x0032f, "SWZ.YY.v2i16", FMA_ONE_SRC }, + { true, 0x00345, "LOG_FREXPM", FMA_ONE_SRC }, + { true, 0x00365, "FRCP_FREXPM", FMA_ONE_SRC }, + { true, 0x00375, "FSQRT_FREXPM", FMA_ONE_SRC }, + { true, 0x0038d, "FRCP_FREXPE", FMA_ONE_SRC }, + { true, 0x003a5, "FSQRT_FREXPE", FMA_ONE_SRC }, + { true, 0x003ad, "FRSQ_FREXPE", FMA_ONE_SRC }, + { true, 0x003c5, "LOG_FREXPE", FMA_ONE_SRC }, + { true, 0x003fa, "CLZ", FMA_ONE_SRC }, + { true, 0x00b80, "IMAX3", FMA_THREE_SRC }, + { true, 0x00bc0, "UMAX3", FMA_THREE_SRC }, + { true, 0x00c00, "IMIN3", FMA_THREE_SRC }, + { true, 0x00c40, "UMIN3", FMA_THREE_SRC }, + { true, 0x00ec5, "ROUND", FMA_ONE_SRC }, + { true, 0x00f40, "CSEL", FMA_THREE_SRC }, // src2 != 0 ? src1 : src0 + { true, 0x00fc0, "MUX.i32", FMA_THREE_SRC }, // see ADD comment + { true, 0x01805, "ROUNDEVEN", FMA_ONE_SRC }, + { true, 0x01845, "CEIL", FMA_ONE_SRC }, + { true, 0x01885, "FLOOR", FMA_ONE_SRC }, + { true, 0x018c5, "TRUNC", FMA_ONE_SRC }, + { true, 0x019b0, "ATAN_LDEXP.Y.f32", FMA_TWO_SRC }, + { true, 0x019b8, "ATAN_LDEXP.X.f32", FMA_TWO_SRC }, + { true, 0x01c80, "LSHIFT_ADD_LOW32.u32", FMA_SHIFT_ADD64 }, + { true, 0x01cc0, "LSHIFT_ADD_LOW32.i64", FMA_SHIFT_ADD64 }, + { true, 0x01d80, "LSHIFT_ADD_LOW32.i32", FMA_SHIFT_ADD64 }, + { true, 0x01e00, "SEL.XX.i16", FMA_TWO_SRC }, + { true, 0x01e08, "SEL.YX.i16", FMA_TWO_SRC }, + { true, 0x01e10, "SEL.XY.i16", FMA_TWO_SRC }, + { true, 0x01e18, "SEL.YY.i16", FMA_TWO_SRC }, + { true, 0x00800, "IMAD", FMA_THREE_SRC }, + { true, 0x078db, "POPCNT", FMA_ONE_SRC }, }; -static struct fma_op_info find_fma_op_info(unsigned op) +static struct fma_op_info find_fma_op_info(unsigned op, bool extended) { for (unsigned i = 0; i < ARRAY_SIZE(FMAOpInfos); i++) { unsigned opCmp = ~0; + + if (FMAOpInfos[i].extended != extended) + continue; + + if (extended) + op &= ~0xe0000; + switch (FMAOpInfos[i].src_type) { case FMA_ONE_SRC: opCmp = op; @@ -830,7 +838,7 @@ static void dump_fma(FILE *fp, uint64_t word, struct bifrost_regs regs, struct b } struct bifrost_fma_inst FMA; memcpy((char *) &FMA, (char *) &word, sizeof(struct bifrost_fma_inst)); - struct fma_op_info info = find_fma_op_info(FMA.op); + struct fma_op_info info = find_fma_op_info(FMA.op, (FMA.op & 0xe0000) == 0xe0000); fprintf(fp, "%s", info.name); if (info.src_type == FMA_FADD || -- 2.7.4