From: Eric Anholt Date: Fri, 20 Jul 2018 19:19:36 +0000 (-0700) Subject: v3d: Add QPU pack/unpack for the new SFU instructions. X-Git-Tag: upstream/19.0.0~3652 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=58c1d3860fefc16878670f1d25dc8187a81cb01b;p=platform%2Fupstream%2Fmesa.git v3d: Add QPU pack/unpack for the new SFU instructions. These instructions allow writing the result to any register, instead of a special writeback to r4. --- diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 9e051e6..deaa533 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -107,6 +107,7 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op) [V3D_QPU_A_FLAPUSH] = "flapush", [V3D_QPU_A_FLBPUSH] = "flbpush", [V3D_QPU_A_FLPOP] = "flpop", + [V3D_QPU_A_RECIP] = "recip", [V3D_QPU_A_SETMSF] = "setmsf", [V3D_QPU_A_SETREVF] = "setrevf", [V3D_QPU_A_NOP] = "nop", @@ -135,6 +136,11 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op) [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in", [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out", [V3D_QPU_A_LDVPMP] = "ldvpmp", + [V3D_QPU_A_RSQRT] = "rsqrt", + [V3D_QPU_A_EXP] = "exp", + [V3D_QPU_A_LOG] = "log", + [V3D_QPU_A_SIN] = "sin", + [V3D_QPU_A_RSQRT2] = "rsqrt2", [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in", [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out", [V3D_QPU_A_FCMP] = "fcmp", @@ -369,6 +375,7 @@ static const uint8_t add_op_args[] = { [V3D_QPU_A_FLAPUSH] = D | A, [V3D_QPU_A_FLBPUSH] = D | A, [V3D_QPU_A_FLPOP] = D | A, + [V3D_QPU_A_RECIP] = D | A, [V3D_QPU_A_SETMSF] = D | A, [V3D_QPU_A_SETREVF] = D | A, [V3D_QPU_A_NOP] = 0, @@ -401,6 +408,11 @@ static const uint8_t add_op_args[] = { [V3D_QPU_A_LDVPMD_IN] = D | A, [V3D_QPU_A_LDVPMD_OUT] = D | A, [V3D_QPU_A_LDVPMP] = D | A, + [V3D_QPU_A_RSQRT] = D | A, + [V3D_QPU_A_EXP] = D | A, + [V3D_QPU_A_LOG] = D | A, + [V3D_QPU_A_SIN] = D | A, + [V3D_QPU_A_RSQRT2] = D | A, [V3D_QPU_A_LDVPMG_IN] = D | A | B, [V3D_QPU_A_LDVPMG_OUT] = D | A | B, diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index d99bb9d..09dbf3e 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -166,6 +166,7 @@ enum v3d_qpu_add_op { V3D_QPU_A_FLAPUSH, V3D_QPU_A_FLBPUSH, V3D_QPU_A_FLPOP, + V3D_QPU_A_RECIP, V3D_QPU_A_SETMSF, V3D_QPU_A_SETREVF, V3D_QPU_A_NOP, @@ -194,6 +195,11 @@ enum v3d_qpu_add_op { V3D_QPU_A_LDVPMD_IN, V3D_QPU_A_LDVPMD_OUT, V3D_QPU_A_LDVPMP, + V3D_QPU_A_RSQRT, + V3D_QPU_A_EXP, + V3D_QPU_A_LOG, + V3D_QPU_A_SIN, + V3D_QPU_A_RSQRT2, V3D_QPU_A_LDVPMG_IN, V3D_QPU_A_LDVPMG_OUT, V3D_QPU_A_FCMP, diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c index d2e878d..70f31d7 100644 --- a/src/broadcom/qpu/qpu_pack.c +++ b/src/broadcom/qpu/qpu_pack.c @@ -493,6 +493,7 @@ static const struct opcode_desc add_ops[] = { { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH }, { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH }, { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP }, + { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP }, { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF }, { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF }, { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 }, @@ -522,6 +523,11 @@ static const struct opcode_desc add_ops[] = { { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 }, + { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 }, + { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 }, + { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 }, + { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 }, + { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 }, { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, /* FIXME: MORE COMPLICATED */ diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c index 7237912..2e8d980 100644 --- a/src/broadcom/qpu/tests/qpu_disasm.c +++ b/src/broadcom/qpu/tests/qpu_disasm.c @@ -84,6 +84,14 @@ static const struct { { 41, 0x3de02040f8ff7201ull, "stvpmv 1, rf8 ; mov r1, 1" }, { 41, 0xd8000e50bb2d3000ull, "sampid rf16 ; fmul rf57.h, r3, r1.l" }, + /* v4.1 SFU instructions. */ + { 41, 0xe98d60c1ba2aef80ull, "recip rf1, rf62 ; fmul r3.h, r2.l, r1.l; ldunifrf.rf53" }, + { 41, 0x7d87c2debc51c000ull, "rsqrt rf30, r4 ; fmul rf11, r4.h, r2.h; ldunifrf.rf31" }, + { 41, 0xb182475abc2bb000ull, "rsqrt2 rf26, r3 ; fmul rf29.l, r2.h, r1.abs; ldunifrf.rf9" }, + { 41, 0x79880808bc0b6900ull, "sin rf8, rf36 ; fmul rf32, r2.h, r0.l; ldunifrf.rf32" }, + { 41, 0x04092094bc5a28c0ull, "exp.ifb rf20, r2 ; add r2, rf35, r2" }, + { 41, 0xe00648bfbc32a000ull, "log rf63, r2 ; fmul.andnn rf34.h, r4.l, r1.abs" }, + /* v4.2 changes */ { 42, 0x3c203192bb814000ull, "barrierid syncb ; nop ; thrsw" }, };