const char *name;
} opcs[1 << (3+NOPC_BITS)] = {
#define OPC(cat, opc, name) [(opc)] = { #name }
- /* category 0: */
- OPC(0, OPC_NOP, nop),
- OPC(0, OPC_B, b),
- OPC(0, OPC_JUMP, jump),
- OPC(0, OPC_CALL, call),
- OPC(0, OPC_RET, ret),
- OPC(0, OPC_KILL, kill),
- OPC(0, OPC_DEMOTE, demote),
- OPC(0, OPC_END, end),
- OPC(0, OPC_EMIT, emit),
- OPC(0, OPC_CUT, cut),
- OPC(0, OPC_CHMASK, chmask),
- OPC(0, OPC_CHSH, chsh),
- OPC(0, OPC_FLOW_REV, flow_rev),
- OPC(0, OPC_PREDT, predt),
- OPC(0, OPC_PREDF, predf),
- OPC(0, OPC_PREDE, prede),
- OPC(0, OPC_BKT, bkt),
- OPC(0, OPC_STKS, stks),
- OPC(0, OPC_STKR, stkr),
- OPC(0, OPC_XSET, xset),
- OPC(0, OPC_XCLR, xclr),
- OPC(0, OPC_GETONE, getone),
- OPC(0, OPC_DBG, dbg),
- OPC(0, OPC_SHPS, shps),
- OPC(0, OPC_SHPE, shpe),
-
- /* category 1: */
- OPC(1, OPC_MOV, ),
- OPC(1, OPC_MOVMSK, movmsk),
- OPC(1, OPC_SWZ, swz),
- OPC(1, OPC_SCT, sct),
- OPC(1, OPC_GAT, gat),
- OPC(1, OPC_BALLOT_MACRO, ballot.macro),
- OPC(1, OPC_ANY_MACRO, any.macro),
- OPC(1, OPC_ALL_MACRO, all.macro),
- OPC(1, OPC_ELECT_MACRO, elect.macro),
- OPC(1, OPC_READ_COND_MACRO, read_cond.macro),
- OPC(1, OPC_READ_FIRST_MACRO, read_first.macro),
- OPC(1, OPC_SWZ_SHARED_MACRO, swz_shared.macro),
-
- /* category 2: */
- OPC(2, OPC_ADD_F, add.f),
- OPC(2, OPC_MIN_F, min.f),
- OPC(2, OPC_MAX_F, max.f),
- OPC(2, OPC_MUL_F, mul.f),
- OPC(2, OPC_SIGN_F, sign.f),
- OPC(2, OPC_CMPS_F, cmps.f),
- OPC(2, OPC_ABSNEG_F, absneg.f),
- OPC(2, OPC_CMPV_F, cmpv.f),
- OPC(2, OPC_FLOOR_F, floor.f),
- OPC(2, OPC_CEIL_F, ceil.f),
- OPC(2, OPC_RNDNE_F, rndne.f),
- OPC(2, OPC_RNDAZ_F, rndaz.f),
- OPC(2, OPC_TRUNC_F, trunc.f),
- OPC(2, OPC_ADD_U, add.u),
- OPC(2, OPC_ADD_S, add.s),
- OPC(2, OPC_SUB_U, sub.u),
- OPC(2, OPC_SUB_S, sub.s),
- OPC(2, OPC_CMPS_U, cmps.u),
- OPC(2, OPC_CMPS_S, cmps.s),
- OPC(2, OPC_MIN_U, min.u),
- OPC(2, OPC_MIN_S, min.s),
- OPC(2, OPC_MAX_U, max.u),
- OPC(2, OPC_MAX_S, max.s),
- OPC(2, OPC_ABSNEG_S, absneg.s),
- OPC(2, OPC_AND_B, and.b),
- OPC(2, OPC_OR_B, or.b),
- OPC(2, OPC_NOT_B, not.b),
- OPC(2, OPC_XOR_B, xor.b),
- OPC(2, OPC_CMPV_U, cmpv.u),
- OPC(2, OPC_CMPV_S, cmpv.s),
- OPC(2, OPC_MUL_U24, mul.u24),
- OPC(2, OPC_MUL_S24, mul.s24),
- OPC(2, OPC_MULL_U, mull.u),
- OPC(2, OPC_BFREV_B, bfrev.b),
- OPC(2, OPC_CLZ_S, clz.s),
- OPC(2, OPC_CLZ_B, clz.b),
- OPC(2, OPC_SHL_B, shl.b),
- OPC(2, OPC_SHR_B, shr.b),
- OPC(2, OPC_ASHR_B, ashr.b),
- OPC(2, OPC_BARY_F, bary.f),
- OPC(2, OPC_MGEN_B, mgen.b),
- OPC(2, OPC_GETBIT_B, getbit.b),
- OPC(2, OPC_SETRM, setrm),
- OPC(2, OPC_CBITS_B, cbits.b),
- OPC(2, OPC_SHB, shb),
- OPC(2, OPC_MSAD, msad),
-
- /* category 3: */
- OPC(3, OPC_MAD_U16, mad.u16),
- OPC(3, OPC_MADSH_U16, madsh.u16),
- OPC(3, OPC_MAD_S16, mad.s16),
- OPC(3, OPC_MADSH_M16, madsh.m16),
- OPC(3, OPC_MAD_U24, mad.u24),
- OPC(3, OPC_MAD_S24, mad.s24),
- OPC(3, OPC_MAD_F16, mad.f16),
- OPC(3, OPC_MAD_F32, mad.f32),
- OPC(3, OPC_SEL_B16, sel.b16),
- OPC(3, OPC_SEL_B32, sel.b32),
- OPC(3, OPC_SEL_S16, sel.s16),
- OPC(3, OPC_SEL_S32, sel.s32),
- OPC(3, OPC_SEL_F16, sel.f16),
- OPC(3, OPC_SEL_F32, sel.f32),
- OPC(3, OPC_SAD_S16, sad.s16),
- OPC(3, OPC_SAD_S32, sad.s32),
- OPC(3, OPC_SHLG_B16, shlg.b16),
-
- /* category 4: */
- OPC(4, OPC_RCP, rcp),
- OPC(4, OPC_RSQ, rsq),
- OPC(4, OPC_LOG2, log2),
- OPC(4, OPC_EXP2, exp2),
- OPC(4, OPC_SIN, sin),
- OPC(4, OPC_COS, cos),
- OPC(4, OPC_SQRT, sqrt),
- OPC(4, OPC_HRSQ, hrsq),
- OPC(4, OPC_HLOG2, hlog2),
- OPC(4, OPC_HEXP2, hexp2),
-
- /* category 5: */
- OPC(5, OPC_ISAM, isam),
- OPC(5, OPC_ISAML, isaml),
- OPC(5, OPC_ISAMM, isamm),
- OPC(5, OPC_SAM, sam),
- OPC(5, OPC_SAMB, samb),
- OPC(5, OPC_SAML, saml),
- OPC(5, OPC_SAMGQ, samgq),
- OPC(5, OPC_GETLOD, getlod),
- OPC(5, OPC_CONV, conv),
- OPC(5, OPC_CONVM, convm),
- OPC(5, OPC_GETSIZE, getsize),
- OPC(5, OPC_GETBUF, getbuf),
- OPC(5, OPC_GETPOS, getpos),
- OPC(5, OPC_GETINFO, getinfo),
- OPC(5, OPC_DSX, dsx),
- OPC(5, OPC_DSY, dsy),
- OPC(5, OPC_GATHER4R, gather4r),
- OPC(5, OPC_GATHER4G, gather4g),
- OPC(5, OPC_GATHER4B, gather4b),
- OPC(5, OPC_GATHER4A, gather4a),
- OPC(5, OPC_SAMGP0, samgp0),
- OPC(5, OPC_SAMGP1, samgp1),
- OPC(5, OPC_SAMGP2, samgp2),
- OPC(5, OPC_SAMGP3, samgp3),
- OPC(5, OPC_DSXPP_1, dsxpp.1),
- OPC(5, OPC_DSYPP_1, dsypp.1),
- OPC(5, OPC_RGETPOS, rgetpos),
- OPC(5, OPC_RGETINFO, rgetinfo),
- /* macros are needed here for ir3_print */
- OPC(5, OPC_DSXPP_MACRO, dsxpp.macro),
- OPC(5, OPC_DSYPP_MACRO, dsypp.macro),
-
-
- /* category 6: */
- OPC(6, OPC_LDG, ldg),
- OPC(6, OPC_LDG_A, ldg.a),
- OPC(6, OPC_LDL, ldl),
- OPC(6, OPC_LDP, ldp),
- OPC(6, OPC_STG, stg),
- OPC(6, OPC_STG_A, stg.a),
- OPC(6, OPC_STL, stl),
- OPC(6, OPC_STP, stp),
- OPC(6, OPC_LDIB, ldib),
- OPC(6, OPC_G2L, g2l),
- OPC(6, OPC_L2G, l2g),
- OPC(6, OPC_PREFETCH, prefetch),
- OPC(6, OPC_LDLW, ldlw),
- OPC(6, OPC_STLW, stlw),
- OPC(6, OPC_RESFMT, resfmt),
- OPC(6, OPC_RESINFO, resinfo),
- OPC(6, OPC_ATOMIC_ADD, atomic.add),
- OPC(6, OPC_ATOMIC_SUB, atomic.sub),
- OPC(6, OPC_ATOMIC_XCHG, atomic.xchg),
- OPC(6, OPC_ATOMIC_INC, atomic.inc),
- OPC(6, OPC_ATOMIC_DEC, atomic.dec),
- OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
- OPC(6, OPC_ATOMIC_MIN, atomic.min),
- OPC(6, OPC_ATOMIC_MAX, atomic.max),
- OPC(6, OPC_ATOMIC_AND, atomic.and),
- OPC(6, OPC_ATOMIC_OR, atomic.or),
- OPC(6, OPC_ATOMIC_XOR, atomic.xor),
- OPC(6, OPC_LDGB, ldgb),
- OPC(6, OPC_STGB, stgb),
- OPC(6, OPC_STIB, stib),
- OPC(6, OPC_LDC, ldc),
- OPC(6, OPC_LDLV, ldlv),
- OPC(6, OPC_PIPR, pipr),
- OPC(6, OPC_PIPC, pipc),
- OPC(6, OPC_EMIT2, emit),
- OPC(6, OPC_ENDLS, endls),
- OPC(6, OPC_GETSPID, getspid),
- OPC(6, OPC_GETWID, getwid),
-
- OPC(7, OPC_BAR, bar),
- OPC(7, OPC_FENCE, fence),
-
+ /* clang-format off */
+ /* category 0: */
+ OPC(0, OPC_NOP, nop),
+ OPC(0, OPC_B, b),
+ OPC(0, OPC_JUMP, jump),
+ OPC(0, OPC_CALL, call),
+ OPC(0, OPC_RET, ret),
+ OPC(0, OPC_KILL, kill),
+ OPC(0, OPC_DEMOTE, demote),
+ OPC(0, OPC_END, end),
+ OPC(0, OPC_EMIT, emit),
+ OPC(0, OPC_CUT, cut),
+ OPC(0, OPC_CHMASK, chmask),
+ OPC(0, OPC_CHSH, chsh),
+ OPC(0, OPC_FLOW_REV, flow_rev),
+ OPC(0, OPC_PREDT, predt),
+ OPC(0, OPC_PREDF, predf),
+ OPC(0, OPC_PREDE, prede),
+ OPC(0, OPC_BKT, bkt),
+ OPC(0, OPC_STKS, stks),
+ OPC(0, OPC_STKR, stkr),
+ OPC(0, OPC_XSET, xset),
+ OPC(0, OPC_XCLR, xclr),
+ OPC(0, OPC_GETONE, getone),
+ OPC(0, OPC_DBG, dbg),
+ OPC(0, OPC_SHPS, shps),
+ OPC(0, OPC_SHPE, shpe),
+
+ /* category 1: */
+ OPC(1, OPC_MOV, ),
+ OPC(1, OPC_MOVMSK, movmsk),
+ OPC(1, OPC_SWZ, swz),
+ OPC(1, OPC_SCT, sct),
+ OPC(1, OPC_GAT, gat),
+ OPC(1, OPC_BALLOT_MACRO, ballot.macro),
+ OPC(1, OPC_ANY_MACRO, any.macro),
+ OPC(1, OPC_ALL_MACRO, all.macro),
+ OPC(1, OPC_ELECT_MACRO, elect.macro),
+ OPC(1, OPC_READ_COND_MACRO, read_cond.macro),
+ OPC(1, OPC_READ_FIRST_MACRO, read_first.macro),
+ OPC(1, OPC_SWZ_SHARED_MACRO, swz_shared.macro),
+
+ /* category 2: */
+ OPC(2, OPC_ADD_F, add.f),
+ OPC(2, OPC_MIN_F, min.f),
+ OPC(2, OPC_MAX_F, max.f),
+ OPC(2, OPC_MUL_F, mul.f),
+ OPC(2, OPC_SIGN_F, sign.f),
+ OPC(2, OPC_CMPS_F, cmps.f),
+ OPC(2, OPC_ABSNEG_F, absneg.f),
+ OPC(2, OPC_CMPV_F, cmpv.f),
+ OPC(2, OPC_FLOOR_F, floor.f),
+ OPC(2, OPC_CEIL_F, ceil.f),
+ OPC(2, OPC_RNDNE_F, rndne.f),
+ OPC(2, OPC_RNDAZ_F, rndaz.f),
+ OPC(2, OPC_TRUNC_F, trunc.f),
+ OPC(2, OPC_ADD_U, add.u),
+ OPC(2, OPC_ADD_S, add.s),
+ OPC(2, OPC_SUB_U, sub.u),
+ OPC(2, OPC_SUB_S, sub.s),
+ OPC(2, OPC_CMPS_U, cmps.u),
+ OPC(2, OPC_CMPS_S, cmps.s),
+ OPC(2, OPC_MIN_U, min.u),
+ OPC(2, OPC_MIN_S, min.s),
+ OPC(2, OPC_MAX_U, max.u),
+ OPC(2, OPC_MAX_S, max.s),
+ OPC(2, OPC_ABSNEG_S, absneg.s),
+ OPC(2, OPC_AND_B, and.b),
+ OPC(2, OPC_OR_B, or.b),
+ OPC(2, OPC_NOT_B, not.b),
+ OPC(2, OPC_XOR_B, xor.b),
+ OPC(2, OPC_CMPV_U, cmpv.u),
+ OPC(2, OPC_CMPV_S, cmpv.s),
+ OPC(2, OPC_MUL_U24, mul.u24),
+ OPC(2, OPC_MUL_S24, mul.s24),
+ OPC(2, OPC_MULL_U, mull.u),
+ OPC(2, OPC_BFREV_B, bfrev.b),
+ OPC(2, OPC_CLZ_S, clz.s),
+ OPC(2, OPC_CLZ_B, clz.b),
+ OPC(2, OPC_SHL_B, shl.b),
+ OPC(2, OPC_SHR_B, shr.b),
+ OPC(2, OPC_ASHR_B, ashr.b),
+ OPC(2, OPC_BARY_F, bary.f),
+ OPC(2, OPC_MGEN_B, mgen.b),
+ OPC(2, OPC_GETBIT_B, getbit.b),
+ OPC(2, OPC_SETRM, setrm),
+ OPC(2, OPC_CBITS_B, cbits.b),
+ OPC(2, OPC_SHB, shb),
+ OPC(2, OPC_MSAD, msad),
+
+ /* category 3: */
+ OPC(3, OPC_MAD_U16, mad.u16),
+ OPC(3, OPC_MADSH_U16, madsh.u16),
+ OPC(3, OPC_MAD_S16, mad.s16),
+ OPC(3, OPC_MADSH_M16, madsh.m16),
+ OPC(3, OPC_MAD_U24, mad.u24),
+ OPC(3, OPC_MAD_S24, mad.s24),
+ OPC(3, OPC_MAD_F16, mad.f16),
+ OPC(3, OPC_MAD_F32, mad.f32),
+ OPC(3, OPC_SEL_B16, sel.b16),
+ OPC(3, OPC_SEL_B32, sel.b32),
+ OPC(3, OPC_SEL_S16, sel.s16),
+ OPC(3, OPC_SEL_S32, sel.s32),
+ OPC(3, OPC_SEL_F16, sel.f16),
+ OPC(3, OPC_SEL_F32, sel.f32),
+ OPC(3, OPC_SAD_S16, sad.s16),
+ OPC(3, OPC_SAD_S32, sad.s32),
+ OPC(3, OPC_SHLG_B16, shlg.b16),
+
+ /* category 4: */
+ OPC(4, OPC_RCP, rcp),
+ OPC(4, OPC_RSQ, rsq),
+ OPC(4, OPC_LOG2, log2),
+ OPC(4, OPC_EXP2, exp2),
+ OPC(4, OPC_SIN, sin),
+ OPC(4, OPC_COS, cos),
+ OPC(4, OPC_SQRT, sqrt),
+ OPC(4, OPC_HRSQ, hrsq),
+ OPC(4, OPC_HLOG2, hlog2),
+ OPC(4, OPC_HEXP2, hexp2),
+
+ /* category 5: */
+ OPC(5, OPC_ISAM, isam),
+ OPC(5, OPC_ISAML, isaml),
+ OPC(5, OPC_ISAMM, isamm),
+ OPC(5, OPC_SAM, sam),
+ OPC(5, OPC_SAMB, samb),
+ OPC(5, OPC_SAML, saml),
+ OPC(5, OPC_SAMGQ, samgq),
+ OPC(5, OPC_GETLOD, getlod),
+ OPC(5, OPC_CONV, conv),
+ OPC(5, OPC_CONVM, convm),
+ OPC(5, OPC_GETSIZE, getsize),
+ OPC(5, OPC_GETBUF, getbuf),
+ OPC(5, OPC_GETPOS, getpos),
+ OPC(5, OPC_GETINFO, getinfo),
+ OPC(5, OPC_DSX, dsx),
+ OPC(5, OPC_DSY, dsy),
+ OPC(5, OPC_GATHER4R, gather4r),
+ OPC(5, OPC_GATHER4G, gather4g),
+ OPC(5, OPC_GATHER4B, gather4b),
+ OPC(5, OPC_GATHER4A, gather4a),
+ OPC(5, OPC_SAMGP0, samgp0),
+ OPC(5, OPC_SAMGP1, samgp1),
+ OPC(5, OPC_SAMGP2, samgp2),
+ OPC(5, OPC_SAMGP3, samgp3),
+ OPC(5, OPC_DSXPP_1, dsxpp.1),
+ OPC(5, OPC_DSYPP_1, dsypp.1),
+ OPC(5, OPC_RGETPOS, rgetpos),
+ OPC(5, OPC_RGETINFO, rgetinfo),
+ /* macros are needed here for ir3_print */
+ OPC(5, OPC_DSXPP_MACRO, dsxpp.macro),
+ OPC(5, OPC_DSYPP_MACRO, dsypp.macro),
+
+
+ /* category 6: */
+ OPC(6, OPC_LDG, ldg),
+ OPC(6, OPC_LDG_A, ldg.a),
+ OPC(6, OPC_LDL, ldl),
+ OPC(6, OPC_LDP, ldp),
+ OPC(6, OPC_STG, stg),
+ OPC(6, OPC_STG_A, stg.a),
+ OPC(6, OPC_STL, stl),
+ OPC(6, OPC_STP, stp),
+ OPC(6, OPC_LDIB, ldib),
+ OPC(6, OPC_G2L, g2l),
+ OPC(6, OPC_L2G, l2g),
+ OPC(6, OPC_PREFETCH, prefetch),
+ OPC(6, OPC_LDLW, ldlw),
+ OPC(6, OPC_STLW, stlw),
+ OPC(6, OPC_RESFMT, resfmt),
+ OPC(6, OPC_RESINFO, resinfo),
+ OPC(6, OPC_ATOMIC_ADD, atomic.add),
+ OPC(6, OPC_ATOMIC_SUB, atomic.sub),
+ OPC(6, OPC_ATOMIC_XCHG, atomic.xchg),
+ OPC(6, OPC_ATOMIC_INC, atomic.inc),
+ OPC(6, OPC_ATOMIC_DEC, atomic.dec),
+ OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
+ OPC(6, OPC_ATOMIC_MIN, atomic.min),
+ OPC(6, OPC_ATOMIC_MAX, atomic.max),
+ OPC(6, OPC_ATOMIC_AND, atomic.and),
+ OPC(6, OPC_ATOMIC_OR, atomic.or),
+ OPC(6, OPC_ATOMIC_XOR, atomic.xor),
+ OPC(6, OPC_LDGB, ldgb),
+ OPC(6, OPC_STGB, stgb),
+ OPC(6, OPC_STIB, stib),
+ OPC(6, OPC_LDC, ldc),
+ OPC(6, OPC_LDLV, ldlv),
+ OPC(6, OPC_PIPR, pipr),
+ OPC(6, OPC_PIPC, pipc),
+ OPC(6, OPC_EMIT2, emit),
+ OPC(6, OPC_ENDLS, endls),
+ OPC(6, OPC_GETSPID, getspid),
+ OPC(6, OPC_GETWID, getwid),
+
+ OPC(7, OPC_BAR, bar),
+ OPC(7, OPC_FENCE, fence),
+/* clang-format on */
#undef OPC
};
#include <stdbool.h>
#include <assert.h>
+/* clang-format off */
void ir3_assert_handler(const char *expr, const char *file, int line,
- const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
+ const char *func) __attribute__((weak)) __attribute__((__noreturn__));
+/* clang-format on */
/* A wrapper for assert() that allows overriding handling of a failed
* assert. This is needed for tools like crashdec which can want to
#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
+/* clang-format off */
typedef enum {
- /* category 0: */
- OPC_NOP = _OPC(0, 0),
- OPC_B = _OPC(0, 1),
- OPC_JUMP = _OPC(0, 2),
- OPC_CALL = _OPC(0, 3),
- OPC_RET = _OPC(0, 4),
- OPC_KILL = _OPC(0, 5),
- OPC_END = _OPC(0, 6),
- OPC_EMIT = _OPC(0, 7),
- OPC_CUT = _OPC(0, 8),
- OPC_CHMASK = _OPC(0, 9),
- OPC_CHSH = _OPC(0, 10),
- OPC_FLOW_REV = _OPC(0, 11),
-
- OPC_BKT = _OPC(0, 16),
- OPC_STKS = _OPC(0, 17),
- OPC_STKR = _OPC(0, 18),
- OPC_XSET = _OPC(0, 19),
- OPC_XCLR = _OPC(0, 20),
- OPC_GETONE = _OPC(0, 21),
- OPC_DBG = _OPC(0, 22),
- OPC_SHPS = _OPC(0, 23), /* shader prologue start */
- OPC_SHPE = _OPC(0, 24), /* shader prologue end */
-
- OPC_PREDT = _OPC(0, 29), /* predicated true */
- OPC_PREDF = _OPC(0, 30), /* predicated false */
- OPC_PREDE = _OPC(0, 31), /* predicated end */
-
- /* Logical opcodes for different branch instruction variations: */
- OPC_BR = _OPC(0, 40),
- OPC_BRAO = _OPC(0, 41),
- OPC_BRAA = _OPC(0, 42),
- OPC_BRAC = _OPC(0, 43),
- OPC_BANY = _OPC(0, 44),
- OPC_BALL = _OPC(0, 45),
- OPC_BRAX = _OPC(0, 46),
-
- /* Logical opcode to distinguish kill and demote */
- OPC_DEMOTE = _OPC(0, 47),
-
- /* category 1: */
- OPC_MOV = _OPC(1, 0),
- OPC_MOVP = _OPC(1, 1),
- /* swz, gat, sct */
- OPC_MOVMSK = _OPC(1, 3),
-
- /* Virtual opcodes for instructions differentiated via a "sub-opcode" that
- * replaces the repeat field:
- */
- OPC_SWZ = _OPC(1, 4),
- OPC_GAT = _OPC(1, 5),
- OPC_SCT = _OPC(1, 6),
-
- /* Logical opcodes for different variants of mov: */
- OPC_MOV_IMMED = _OPC(1, 40),
- OPC_MOV_CONST = _OPC(1, 41),
- OPC_MOV_GPR = _OPC(1, 42),
- OPC_MOV_RELGPR = _OPC(1, 43),
- OPC_MOV_RELCONST = _OPC(1, 44),
-
- /* Macros that expand to an if statement + move */
- OPC_BALLOT_MACRO = _OPC(1, 50),
- OPC_ANY_MACRO = _OPC(1, 51),
- OPC_ALL_MACRO = _OPC(1, 52),
- OPC_ELECT_MACRO = _OPC(1, 53),
- OPC_READ_COND_MACRO = _OPC(1, 54),
- OPC_READ_FIRST_MACRO = _OPC(1, 55),
- OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
-
- /* category 2: */
- OPC_ADD_F = _OPC(2, 0),
- OPC_MIN_F = _OPC(2, 1),
- OPC_MAX_F = _OPC(2, 2),
- OPC_MUL_F = _OPC(2, 3),
- OPC_SIGN_F = _OPC(2, 4),
- OPC_CMPS_F = _OPC(2, 5),
- OPC_ABSNEG_F = _OPC(2, 6),
- OPC_CMPV_F = _OPC(2, 7),
- /* 8 - invalid */
- OPC_FLOOR_F = _OPC(2, 9),
- OPC_CEIL_F = _OPC(2, 10),
- OPC_RNDNE_F = _OPC(2, 11),
- OPC_RNDAZ_F = _OPC(2, 12),
- OPC_TRUNC_F = _OPC(2, 13),
- /* 14-15 - invalid */
- OPC_ADD_U = _OPC(2, 16),
- OPC_ADD_S = _OPC(2, 17),
- OPC_SUB_U = _OPC(2, 18),
- OPC_SUB_S = _OPC(2, 19),
- OPC_CMPS_U = _OPC(2, 20),
- OPC_CMPS_S = _OPC(2, 21),
- OPC_MIN_U = _OPC(2, 22),
- OPC_MIN_S = _OPC(2, 23),
- OPC_MAX_U = _OPC(2, 24),
- OPC_MAX_S = _OPC(2, 25),
- OPC_ABSNEG_S = _OPC(2, 26),
- /* 27 - invalid */
- OPC_AND_B = _OPC(2, 28),
- OPC_OR_B = _OPC(2, 29),
- OPC_NOT_B = _OPC(2, 30),
- OPC_XOR_B = _OPC(2, 31),
- /* 32 - invalid */
- OPC_CMPV_U = _OPC(2, 33),
- OPC_CMPV_S = _OPC(2, 34),
- /* 35-47 - invalid */
- OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
- OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
- OPC_MULL_U = _OPC(2, 50),
- OPC_BFREV_B = _OPC(2, 51),
- OPC_CLZ_S = _OPC(2, 52),
- OPC_CLZ_B = _OPC(2, 53),
- OPC_SHL_B = _OPC(2, 54),
- OPC_SHR_B = _OPC(2, 55),
- OPC_ASHR_B = _OPC(2, 56),
- OPC_BARY_F = _OPC(2, 57),
- OPC_MGEN_B = _OPC(2, 58),
- OPC_GETBIT_B = _OPC(2, 59),
- OPC_SETRM = _OPC(2, 60),
- OPC_CBITS_B = _OPC(2, 61),
- OPC_SHB = _OPC(2, 62),
- OPC_MSAD = _OPC(2, 63),
-
- /* category 3: */
- OPC_MAD_U16 = _OPC(3, 0),
- OPC_MADSH_U16 = _OPC(3, 1),
- OPC_MAD_S16 = _OPC(3, 2),
- OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
- OPC_MAD_U24 = _OPC(3, 4),
- OPC_MAD_S24 = _OPC(3, 5),
- OPC_MAD_F16 = _OPC(3, 6),
- OPC_MAD_F32 = _OPC(3, 7),
- OPC_SEL_B16 = _OPC(3, 8),
- OPC_SEL_B32 = _OPC(3, 9),
- OPC_SEL_S16 = _OPC(3, 10),
- OPC_SEL_S32 = _OPC(3, 11),
- OPC_SEL_F16 = _OPC(3, 12),
- OPC_SEL_F32 = _OPC(3, 13),
- OPC_SAD_S16 = _OPC(3, 14),
- OPC_SAD_S32 = _OPC(3, 15),
- OPC_SHLG_B16 = _OPC(3, 16),
-
- /* category 4: */
- OPC_RCP = _OPC(4, 0),
- OPC_RSQ = _OPC(4, 1),
- OPC_LOG2 = _OPC(4, 2),
- OPC_EXP2 = _OPC(4, 3),
- OPC_SIN = _OPC(4, 4),
- OPC_COS = _OPC(4, 5),
- OPC_SQRT = _OPC(4, 6),
- /* NOTE that these are 8+opc from their highp equivs, so it's possible
- * that the high order bit in the opc field has been repurposed for
- * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
- * still use the same opc as highp
- */
- OPC_HRSQ = _OPC(4, 9),
- OPC_HLOG2 = _OPC(4, 10),
- OPC_HEXP2 = _OPC(4, 11),
-
- /* category 5: */
- OPC_ISAM = _OPC(5, 0),
- OPC_ISAML = _OPC(5, 1),
- OPC_ISAMM = _OPC(5, 2),
- OPC_SAM = _OPC(5, 3),
- OPC_SAMB = _OPC(5, 4),
- OPC_SAML = _OPC(5, 5),
- OPC_SAMGQ = _OPC(5, 6),
- OPC_GETLOD = _OPC(5, 7),
- OPC_CONV = _OPC(5, 8),
- OPC_CONVM = _OPC(5, 9),
- OPC_GETSIZE = _OPC(5, 10),
- OPC_GETBUF = _OPC(5, 11),
- OPC_GETPOS = _OPC(5, 12),
- OPC_GETINFO = _OPC(5, 13),
- OPC_DSX = _OPC(5, 14),
- OPC_DSY = _OPC(5, 15),
- OPC_GATHER4R = _OPC(5, 16),
- OPC_GATHER4G = _OPC(5, 17),
- OPC_GATHER4B = _OPC(5, 18),
- OPC_GATHER4A = _OPC(5, 19),
- OPC_SAMGP0 = _OPC(5, 20),
- OPC_SAMGP1 = _OPC(5, 21),
- OPC_SAMGP2 = _OPC(5, 22),
- OPC_SAMGP3 = _OPC(5, 23),
- OPC_DSXPP_1 = _OPC(5, 24),
- OPC_DSYPP_1 = _OPC(5, 25),
- OPC_RGETPOS = _OPC(5, 26),
- OPC_RGETINFO = _OPC(5, 27),
- /* cat5 meta instructions, placed above the cat5 opc field's size */
- OPC_DSXPP_MACRO = _OPC(5, 32),
- OPC_DSYPP_MACRO = _OPC(5, 33),
-
- /* category 6: */
- OPC_LDG = _OPC(6, 0), /* load-global */
- OPC_LDL = _OPC(6, 1),
- OPC_LDP = _OPC(6, 2),
- OPC_STG = _OPC(6, 3), /* store-global */
- OPC_STL = _OPC(6, 4),
- OPC_STP = _OPC(6, 5),
- OPC_LDIB = _OPC(6, 6),
- OPC_G2L = _OPC(6, 7),
- OPC_L2G = _OPC(6, 8),
- OPC_PREFETCH = _OPC(6, 9),
- OPC_LDLW = _OPC(6, 10),
- OPC_STLW = _OPC(6, 11),
- OPC_RESFMT = _OPC(6, 14),
- OPC_RESINFO = _OPC(6, 15),
- OPC_ATOMIC_ADD = _OPC(6, 16),
- OPC_ATOMIC_SUB = _OPC(6, 17),
- OPC_ATOMIC_XCHG = _OPC(6, 18),
- OPC_ATOMIC_INC = _OPC(6, 19),
- OPC_ATOMIC_DEC = _OPC(6, 20),
- OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
- OPC_ATOMIC_MIN = _OPC(6, 22),
- OPC_ATOMIC_MAX = _OPC(6, 23),
- OPC_ATOMIC_AND = _OPC(6, 24),
- OPC_ATOMIC_OR = _OPC(6, 25),
- OPC_ATOMIC_XOR = _OPC(6, 26),
- OPC_LDGB = _OPC(6, 27),
- OPC_STGB = _OPC(6, 28),
- OPC_STIB = _OPC(6, 29),
- OPC_LDC = _OPC(6, 30),
- OPC_LDLV = _OPC(6, 31),
- OPC_PIPR = _OPC(6, 32), /* ??? */
- OPC_PIPC = _OPC(6, 33), /* ??? */
- OPC_EMIT2 = _OPC(6, 34), /* ??? */
- OPC_ENDLS = _OPC(6, 35), /* ??? */
- OPC_GETSPID = _OPC(6, 36), /* SP ID */
- OPC_GETWID = _OPC(6, 37), /* wavefront ID */
-
- /* Logical opcodes for things that differ in a6xx+ */
- OPC_STC = _OPC(6, 40),
- OPC_RESINFO_B = _OPC(6, 41),
- OPC_LDIB_B = _OPC(6, 42),
- OPC_STIB_B = _OPC(6, 43),
-
- /* Logical opcodes for different atomic instruction variations: */
- OPC_ATOMIC_B_ADD = _OPC(6, 44),
- OPC_ATOMIC_B_SUB = _OPC(6, 45),
- OPC_ATOMIC_B_XCHG = _OPC(6, 46),
- OPC_ATOMIC_B_INC = _OPC(6, 47),
- OPC_ATOMIC_B_DEC = _OPC(6, 48),
- OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49),
- OPC_ATOMIC_B_MIN = _OPC(6, 50),
- OPC_ATOMIC_B_MAX = _OPC(6, 51),
- OPC_ATOMIC_B_AND = _OPC(6, 52),
- OPC_ATOMIC_B_OR = _OPC(6, 53),
- OPC_ATOMIC_B_XOR = _OPC(6, 54),
-
- OPC_LDG_A = _OPC(6, 55),
- OPC_STG_A = _OPC(6, 56),
-
- /* category 7: */
- OPC_BAR = _OPC(7, 0),
- OPC_FENCE = _OPC(7, 1),
-
- /* meta instructions (category -1): */
- /* placeholder instr to mark shader inputs: */
- OPC_META_INPUT = _OPC(-1, 0),
- /* The "collect" and "split" instructions are used for keeping
- * track of instructions that write to multiple dst registers
- * (split) like texture sample instructions, or read multiple
- * consecutive scalar registers (collect) (bary.f, texture samp)
- *
- * A "split" extracts a scalar component from a vecN, and a
- * "collect" gathers multiple scalar components into a vecN
- */
- OPC_META_SPLIT = _OPC(-1, 2),
- OPC_META_COLLECT = _OPC(-1, 3),
-
- /* placeholder for texture fetches that run before FS invocation
- * starts:
- */
- OPC_META_TEX_PREFETCH = _OPC(-1, 4),
-
- /* Parallel copies have multiple destinations, and copy each destination
- * to its corresponding source. This happens "in parallel," meaning that
- * it happens as-if every source is read first and then every destination
- * is stored. These are produced in RA when register shuffling is
- * required, and then lowered away immediately afterwards.
- */
- OPC_META_PARALLEL_COPY = _OPC(-1, 5),
- OPC_META_PHI = _OPC(-1, 6),
+ /* category 0: */
+ OPC_NOP = _OPC(0, 0),
+ OPC_B = _OPC(0, 1),
+ OPC_JUMP = _OPC(0, 2),
+ OPC_CALL = _OPC(0, 3),
+ OPC_RET = _OPC(0, 4),
+ OPC_KILL = _OPC(0, 5),
+ OPC_END = _OPC(0, 6),
+ OPC_EMIT = _OPC(0, 7),
+ OPC_CUT = _OPC(0, 8),
+ OPC_CHMASK = _OPC(0, 9),
+ OPC_CHSH = _OPC(0, 10),
+ OPC_FLOW_REV = _OPC(0, 11),
+
+ OPC_BKT = _OPC(0, 16),
+ OPC_STKS = _OPC(0, 17),
+ OPC_STKR = _OPC(0, 18),
+ OPC_XSET = _OPC(0, 19),
+ OPC_XCLR = _OPC(0, 20),
+ OPC_GETONE = _OPC(0, 21),
+ OPC_DBG = _OPC(0, 22),
+ OPC_SHPS = _OPC(0, 23), /* shader prologue start */
+ OPC_SHPE = _OPC(0, 24), /* shader prologue end */
+
+ OPC_PREDT = _OPC(0, 29), /* predicated true */
+ OPC_PREDF = _OPC(0, 30), /* predicated false */
+ OPC_PREDE = _OPC(0, 31), /* predicated end */
+
+ /* Logical opcodes for different branch instruction variations: */
+ OPC_BR = _OPC(0, 40),
+ OPC_BRAO = _OPC(0, 41),
+ OPC_BRAA = _OPC(0, 42),
+ OPC_BRAC = _OPC(0, 43),
+ OPC_BANY = _OPC(0, 44),
+ OPC_BALL = _OPC(0, 45),
+ OPC_BRAX = _OPC(0, 46),
+
+ /* Logical opcode to distinguish kill and demote */
+ OPC_DEMOTE = _OPC(0, 47),
+
+ /* category 1: */
+ OPC_MOV = _OPC(1, 0),
+ OPC_MOVP = _OPC(1, 1),
+ /* swz, gat, sct */
+ OPC_MOVMSK = _OPC(1, 3),
+
+ /* Virtual opcodes for instructions differentiated via a "sub-opcode" that
+ * replaces the repeat field:
+ */
+ OPC_SWZ = _OPC(1, 4),
+ OPC_GAT = _OPC(1, 5),
+ OPC_SCT = _OPC(1, 6),
+
+ /* Logical opcodes for different variants of mov: */
+ OPC_MOV_IMMED = _OPC(1, 40),
+ OPC_MOV_CONST = _OPC(1, 41),
+ OPC_MOV_GPR = _OPC(1, 42),
+ OPC_MOV_RELGPR = _OPC(1, 43),
+ OPC_MOV_RELCONST = _OPC(1, 44),
+
+ /* Macros that expand to an if statement + move */
+ OPC_BALLOT_MACRO = _OPC(1, 50),
+ OPC_ANY_MACRO = _OPC(1, 51),
+ OPC_ALL_MACRO = _OPC(1, 52),
+ OPC_ELECT_MACRO = _OPC(1, 53),
+ OPC_READ_COND_MACRO = _OPC(1, 54),
+ OPC_READ_FIRST_MACRO = _OPC(1, 55),
+ OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
+
+ /* category 2: */
+ OPC_ADD_F = _OPC(2, 0),
+ OPC_MIN_F = _OPC(2, 1),
+ OPC_MAX_F = _OPC(2, 2),
+ OPC_MUL_F = _OPC(2, 3),
+ OPC_SIGN_F = _OPC(2, 4),
+ OPC_CMPS_F = _OPC(2, 5),
+ OPC_ABSNEG_F = _OPC(2, 6),
+ OPC_CMPV_F = _OPC(2, 7),
+ /* 8 - invalid */
+ OPC_FLOOR_F = _OPC(2, 9),
+ OPC_CEIL_F = _OPC(2, 10),
+ OPC_RNDNE_F = _OPC(2, 11),
+ OPC_RNDAZ_F = _OPC(2, 12),
+ OPC_TRUNC_F = _OPC(2, 13),
+ /* 14-15 - invalid */
+ OPC_ADD_U = _OPC(2, 16),
+ OPC_ADD_S = _OPC(2, 17),
+ OPC_SUB_U = _OPC(2, 18),
+ OPC_SUB_S = _OPC(2, 19),
+ OPC_CMPS_U = _OPC(2, 20),
+ OPC_CMPS_S = _OPC(2, 21),
+ OPC_MIN_U = _OPC(2, 22),
+ OPC_MIN_S = _OPC(2, 23),
+ OPC_MAX_U = _OPC(2, 24),
+ OPC_MAX_S = _OPC(2, 25),
+ OPC_ABSNEG_S = _OPC(2, 26),
+ /* 27 - invalid */
+ OPC_AND_B = _OPC(2, 28),
+ OPC_OR_B = _OPC(2, 29),
+ OPC_NOT_B = _OPC(2, 30),
+ OPC_XOR_B = _OPC(2, 31),
+ /* 32 - invalid */
+ OPC_CMPV_U = _OPC(2, 33),
+ OPC_CMPV_S = _OPC(2, 34),
+ /* 35-47 - invalid */
+ OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
+ OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
+ OPC_MULL_U = _OPC(2, 50),
+ OPC_BFREV_B = _OPC(2, 51),
+ OPC_CLZ_S = _OPC(2, 52),
+ OPC_CLZ_B = _OPC(2, 53),
+ OPC_SHL_B = _OPC(2, 54),
+ OPC_SHR_B = _OPC(2, 55),
+ OPC_ASHR_B = _OPC(2, 56),
+ OPC_BARY_F = _OPC(2, 57),
+ OPC_MGEN_B = _OPC(2, 58),
+ OPC_GETBIT_B = _OPC(2, 59),
+ OPC_SETRM = _OPC(2, 60),
+ OPC_CBITS_B = _OPC(2, 61),
+ OPC_SHB = _OPC(2, 62),
+ OPC_MSAD = _OPC(2, 63),
+
+ /* category 3: */
+ OPC_MAD_U16 = _OPC(3, 0),
+ OPC_MADSH_U16 = _OPC(3, 1),
+ OPC_MAD_S16 = _OPC(3, 2),
+ OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
+ OPC_MAD_U24 = _OPC(3, 4),
+ OPC_MAD_S24 = _OPC(3, 5),
+ OPC_MAD_F16 = _OPC(3, 6),
+ OPC_MAD_F32 = _OPC(3, 7),
+ OPC_SEL_B16 = _OPC(3, 8),
+ OPC_SEL_B32 = _OPC(3, 9),
+ OPC_SEL_S16 = _OPC(3, 10),
+ OPC_SEL_S32 = _OPC(3, 11),
+ OPC_SEL_F16 = _OPC(3, 12),
+ OPC_SEL_F32 = _OPC(3, 13),
+ OPC_SAD_S16 = _OPC(3, 14),
+ OPC_SAD_S32 = _OPC(3, 15),
+ OPC_SHLG_B16 = _OPC(3, 16),
+
+ /* category 4: */
+ OPC_RCP = _OPC(4, 0),
+ OPC_RSQ = _OPC(4, 1),
+ OPC_LOG2 = _OPC(4, 2),
+ OPC_EXP2 = _OPC(4, 3),
+ OPC_SIN = _OPC(4, 4),
+ OPC_COS = _OPC(4, 5),
+ OPC_SQRT = _OPC(4, 6),
+ /* NOTE that these are 8+opc from their highp equivs, so it's possible
+ * that the high order bit in the opc field has been repurposed for
+ * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
+ * still use the same opc as highp
+ */
+ OPC_HRSQ = _OPC(4, 9),
+ OPC_HLOG2 = _OPC(4, 10),
+ OPC_HEXP2 = _OPC(4, 11),
+
+ /* category 5: */
+ OPC_ISAM = _OPC(5, 0),
+ OPC_ISAML = _OPC(5, 1),
+ OPC_ISAMM = _OPC(5, 2),
+ OPC_SAM = _OPC(5, 3),
+ OPC_SAMB = _OPC(5, 4),
+ OPC_SAML = _OPC(5, 5),
+ OPC_SAMGQ = _OPC(5, 6),
+ OPC_GETLOD = _OPC(5, 7),
+ OPC_CONV = _OPC(5, 8),
+ OPC_CONVM = _OPC(5, 9),
+ OPC_GETSIZE = _OPC(5, 10),
+ OPC_GETBUF = _OPC(5, 11),
+ OPC_GETPOS = _OPC(5, 12),
+ OPC_GETINFO = _OPC(5, 13),
+ OPC_DSX = _OPC(5, 14),
+ OPC_DSY = _OPC(5, 15),
+ OPC_GATHER4R = _OPC(5, 16),
+ OPC_GATHER4G = _OPC(5, 17),
+ OPC_GATHER4B = _OPC(5, 18),
+ OPC_GATHER4A = _OPC(5, 19),
+ OPC_SAMGP0 = _OPC(5, 20),
+ OPC_SAMGP1 = _OPC(5, 21),
+ OPC_SAMGP2 = _OPC(5, 22),
+ OPC_SAMGP3 = _OPC(5, 23),
+ OPC_DSXPP_1 = _OPC(5, 24),
+ OPC_DSYPP_1 = _OPC(5, 25),
+ OPC_RGETPOS = _OPC(5, 26),
+ OPC_RGETINFO = _OPC(5, 27),
+ /* cat5 meta instructions, placed above the cat5 opc field's size */
+ OPC_DSXPP_MACRO = _OPC(5, 32),
+ OPC_DSYPP_MACRO = _OPC(5, 33),
+
+ /* category 6: */
+ OPC_LDG = _OPC(6, 0), /* load-global */
+ OPC_LDL = _OPC(6, 1),
+ OPC_LDP = _OPC(6, 2),
+ OPC_STG = _OPC(6, 3), /* store-global */
+ OPC_STL = _OPC(6, 4),
+ OPC_STP = _OPC(6, 5),
+ OPC_LDIB = _OPC(6, 6),
+ OPC_G2L = _OPC(6, 7),
+ OPC_L2G = _OPC(6, 8),
+ OPC_PREFETCH = _OPC(6, 9),
+ OPC_LDLW = _OPC(6, 10),
+ OPC_STLW = _OPC(6, 11),
+ OPC_RESFMT = _OPC(6, 14),
+ OPC_RESINFO = _OPC(6, 15),
+ OPC_ATOMIC_ADD = _OPC(6, 16),
+ OPC_ATOMIC_SUB = _OPC(6, 17),
+ OPC_ATOMIC_XCHG = _OPC(6, 18),
+ OPC_ATOMIC_INC = _OPC(6, 19),
+ OPC_ATOMIC_DEC = _OPC(6, 20),
+ OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
+ OPC_ATOMIC_MIN = _OPC(6, 22),
+ OPC_ATOMIC_MAX = _OPC(6, 23),
+ OPC_ATOMIC_AND = _OPC(6, 24),
+ OPC_ATOMIC_OR = _OPC(6, 25),
+ OPC_ATOMIC_XOR = _OPC(6, 26),
+ OPC_LDGB = _OPC(6, 27),
+ OPC_STGB = _OPC(6, 28),
+ OPC_STIB = _OPC(6, 29),
+ OPC_LDC = _OPC(6, 30),
+ OPC_LDLV = _OPC(6, 31),
+ OPC_PIPR = _OPC(6, 32), /* ??? */
+ OPC_PIPC = _OPC(6, 33), /* ??? */
+ OPC_EMIT2 = _OPC(6, 34), /* ??? */
+ OPC_ENDLS = _OPC(6, 35), /* ??? */
+ OPC_GETSPID = _OPC(6, 36), /* SP ID */
+ OPC_GETWID = _OPC(6, 37), /* wavefront ID */
+
+ /* Logical opcodes for things that differ in a6xx+ */
+ OPC_STC = _OPC(6, 40),
+ OPC_RESINFO_B = _OPC(6, 41),
+ OPC_LDIB_B = _OPC(6, 42),
+ OPC_STIB_B = _OPC(6, 43),
+
+ /* Logical opcodes for different atomic instruction variations: */
+ OPC_ATOMIC_B_ADD = _OPC(6, 44),
+ OPC_ATOMIC_B_SUB = _OPC(6, 45),
+ OPC_ATOMIC_B_XCHG = _OPC(6, 46),
+ OPC_ATOMIC_B_INC = _OPC(6, 47),
+ OPC_ATOMIC_B_DEC = _OPC(6, 48),
+ OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49),
+ OPC_ATOMIC_B_MIN = _OPC(6, 50),
+ OPC_ATOMIC_B_MAX = _OPC(6, 51),
+ OPC_ATOMIC_B_AND = _OPC(6, 52),
+ OPC_ATOMIC_B_OR = _OPC(6, 53),
+ OPC_ATOMIC_B_XOR = _OPC(6, 54),
+
+ OPC_LDG_A = _OPC(6, 55),
+ OPC_STG_A = _OPC(6, 56),
+
+ /* category 7: */
+ OPC_BAR = _OPC(7, 0),
+ OPC_FENCE = _OPC(7, 1),
+
+ /* meta instructions (category -1): */
+ /* placeholder instr to mark shader inputs: */
+ OPC_META_INPUT = _OPC(-1, 0),
+ /* The "collect" and "split" instructions are used for keeping
+ * track of instructions that write to multiple dst registers
+ * (split) like texture sample instructions, or read multiple
+ * consecutive scalar registers (collect) (bary.f, texture samp)
+ *
+ * A "split" extracts a scalar component from a vecN, and a
+ * "collect" gathers multiple scalar components into a vecN
+ */
+ OPC_META_SPLIT = _OPC(-1, 2),
+ OPC_META_COLLECT = _OPC(-1, 3),
+
+ /* placeholder for texture fetches that run before FS invocation
+ * starts:
+ */
+ OPC_META_TEX_PREFETCH = _OPC(-1, 4),
+
+ /* Parallel copies have multiple destinations, and copy each destination
+ * to its corresponding source. This happens "in parallel," meaning that
+ * it happens as-if every source is read first and then every destination
+ * is stored. These are produced in RA when register shuffling is
+ * required, and then lowered away immediately afterwards.
+ */
+ OPC_META_PARALLEL_COPY = _OPC(-1, 5),
+ OPC_META_PHI = _OPC(-1, 6),
} opc_t;
+/* clang-format on */
#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
#define IR3_INSTR_0 0
-#define __INSTR0(flag, name, opc) \
-static inline struct ir3_instruction * \
-ir3_##name(struct ir3_block *block) \
-{ \
- struct ir3_instruction *instr = \
- ir3_instr_create(block, opc, 1, 0); \
- instr->flags |= flag; \
- return instr; \
-}
-#define INSTR0F(f, name) __INSTR0(IR3_INSTR_##f, name##_##f, OPC_##name)
-#define INSTR0(name) __INSTR0(0, name, OPC_##name)
-
-#define __INSTR1(flag, dst_count, name, opc) \
-static inline struct ir3_instruction * \
-ir3_##name(struct ir3_block *block, \
- struct ir3_instruction *a, unsigned aflags) \
-{ \
- struct ir3_instruction *instr = \
- ir3_instr_create(block, opc, dst_count, 1); \
- for (unsigned i = 0; i < dst_count; i++) \
- __ssa_dst(instr); \
- __ssa_src(instr, a, aflags); \
- instr->flags |= flag; \
- return instr; \
-}
-#define INSTR1F(f, name) __INSTR1(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
-#define INSTR1(name) __INSTR1(0, 1, name, OPC_##name)
-#define INSTR1NODST(name) __INSTR1(0, 0, name, OPC_##name)
-
-#define __INSTR2(flag, name, opc) \
-static inline struct ir3_instruction * \
-ir3_##name(struct ir3_block *block, \
- struct ir3_instruction *a, unsigned aflags, \
- struct ir3_instruction *b, unsigned bflags) \
-{ \
- struct ir3_instruction *instr = \
- ir3_instr_create(block, opc, 1, 2); \
- __ssa_dst(instr); \
- __ssa_src(instr, a, aflags); \
- __ssa_src(instr, b, bflags); \
- instr->flags |= flag; \
- return instr; \
-}
-#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, name##_##f, OPC_##name)
-#define INSTR2(name) __INSTR2(0, name, OPC_##name)
-
-#define __INSTR3(flag, dst_count, name, opc) \
-static inline struct ir3_instruction * \
-ir3_##name(struct ir3_block *block, \
- struct ir3_instruction *a, unsigned aflags, \
- struct ir3_instruction *b, unsigned bflags, \
- struct ir3_instruction *c, unsigned cflags) \
-{ \
- struct ir3_instruction *instr = \
- ir3_instr_create(block, opc, dst_count, 3); \
- for (unsigned i = 0; i < dst_count; i++) \
- __ssa_dst(instr); \
- __ssa_src(instr, a, aflags); \
- __ssa_src(instr, b, bflags); \
- __ssa_src(instr, c, cflags); \
- instr->flags |= flag; \
- return instr; \
-}
-#define INSTR3F(f, name) __INSTR3(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
-#define INSTR3(name) __INSTR3(0, 1, name, OPC_##name)
-#define INSTR3NODST(name) __INSTR3(0, 0, name, OPC_##name)
-
-#define __INSTR4(flag, dst_count, name, opc) \
-static inline struct ir3_instruction * \
-ir3_##name(struct ir3_block *block, \
- struct ir3_instruction *a, unsigned aflags, \
- struct ir3_instruction *b, unsigned bflags, \
- struct ir3_instruction *c, unsigned cflags, \
- struct ir3_instruction *d, unsigned dflags) \
-{ \
- struct ir3_instruction *instr = \
- ir3_instr_create(block, opc, dst_count, 4); \
- for (unsigned i = 0; i < dst_count; i++) \
- __ssa_dst(instr); \
- __ssa_src(instr, a, aflags); \
- __ssa_src(instr, b, bflags); \
- __ssa_src(instr, c, cflags); \
- __ssa_src(instr, d, dflags); \
- instr->flags |= flag; \
- return instr; \
-}
-#define INSTR4F(f, name) __INSTR4(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
-#define INSTR4(name) __INSTR4(0, 1, name, OPC_##name)
-#define INSTR4NODST(name) __INSTR4(0, 0, name, OPC_##name)
-
-#define __INSTR5(flag, name, opc) \
-static inline struct ir3_instruction * \
-ir3_##name(struct ir3_block *block, \
- struct ir3_instruction *a, unsigned aflags, \
- struct ir3_instruction *b, unsigned bflags, \
- struct ir3_instruction *c, unsigned cflags, \
- struct ir3_instruction *d, unsigned dflags, \
- struct ir3_instruction *e, unsigned eflags) \
-{ \
- struct ir3_instruction *instr = \
- ir3_instr_create(block, opc, 1, 5); \
- __ssa_dst(instr); \
- __ssa_src(instr, a, aflags); \
- __ssa_src(instr, b, bflags); \
- __ssa_src(instr, c, cflags); \
- __ssa_src(instr, d, dflags); \
- __ssa_src(instr, e, eflags); \
- instr->flags |= flag; \
- return instr; \
-}
-#define INSTR5F(f, name) __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name)
-#define INSTR5(name) __INSTR5(0, name, OPC_##name)
-
-#define __INSTR6(flag, dst_count, name, opc) \
-static inline struct ir3_instruction * \
-ir3_##name(struct ir3_block *block, \
- struct ir3_instruction *a, unsigned aflags, \
- struct ir3_instruction *b, unsigned bflags, \
- struct ir3_instruction *c, unsigned cflags, \
- struct ir3_instruction *d, unsigned dflags, \
- struct ir3_instruction *e, unsigned eflags, \
- struct ir3_instruction *f, unsigned fflags) \
-{ \
- struct ir3_instruction *instr = \
- ir3_instr_create(block, opc, 1, 6); \
- for (unsigned i = 0; i < dst_count; i++) \
- __ssa_dst(instr); \
- __ssa_src(instr, a, aflags); \
- __ssa_src(instr, b, bflags); \
- __ssa_src(instr, c, cflags); \
- __ssa_src(instr, d, dflags); \
- __ssa_src(instr, e, eflags); \
- __ssa_src(instr, f, fflags); \
- instr->flags |= flag; \
- return instr; \
-}
-#define INSTR6F(f, name) __INSTR6(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
-#define INSTR6(name) __INSTR6(0, 1, name, OPC_##name)
-#define INSTR6NODST(name) __INSTR6(0, 0, name, OPC_##name)
+/* clang-format off */
+#define __INSTR0(flag, name, opc) \
+static inline struct ir3_instruction *ir3_##name(struct ir3_block *block) \
+{ \
+ struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 0); \
+ instr->flags |= flag; \
+ return instr; \
+}
+/* clang-format on */
+#define INSTR0F(f, name) __INSTR0(IR3_INSTR_##f, name##_##f, OPC_##name)
+#define INSTR0(name) __INSTR0(0, name, OPC_##name)
+
+/* clang-format off */
+#define __INSTR1(flag, dst_count, name, opc) \
+static inline struct ir3_instruction *ir3_##name( \
+ struct ir3_block *block, struct ir3_instruction *a, unsigned aflags) \
+{ \
+ struct ir3_instruction *instr = \
+ ir3_instr_create(block, opc, dst_count, 1); \
+ for (unsigned i = 0; i < dst_count; i++) \
+ __ssa_dst(instr); \
+ __ssa_src(instr, a, aflags); \
+ instr->flags |= flag; \
+ return instr; \
+}
+/* clang-format on */
+#define INSTR1F(f, name) __INSTR1(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
+#define INSTR1(name) __INSTR1(0, 1, name, OPC_##name)
+#define INSTR1NODST(name) __INSTR1(0, 0, name, OPC_##name)
+
+/* clang-format off */
+#define __INSTR2(flag, name, opc) \
+static inline struct ir3_instruction *ir3_##name( \
+ struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
+ struct ir3_instruction *b, unsigned bflags) \
+{ \
+ struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 2); \
+ __ssa_dst(instr); \
+ __ssa_src(instr, a, aflags); \
+ __ssa_src(instr, b, bflags); \
+ instr->flags |= flag; \
+ return instr; \
+}
+/* clang-format on */
+#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, name##_##f, OPC_##name)
+#define INSTR2(name) __INSTR2(0, name, OPC_##name)
+
+/* clang-format off */
+#define __INSTR3(flag, dst_count, name, opc) \
+static inline struct ir3_instruction *ir3_##name( \
+ struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
+ struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \
+ unsigned cflags) \
+{ \
+ struct ir3_instruction *instr = \
+ ir3_instr_create(block, opc, dst_count, 3); \
+ for (unsigned i = 0; i < dst_count; i++) \
+ __ssa_dst(instr); \
+ __ssa_src(instr, a, aflags); \
+ __ssa_src(instr, b, bflags); \
+ __ssa_src(instr, c, cflags); \
+ instr->flags |= flag; \
+ return instr; \
+}
+/* clang-format on */
+#define INSTR3F(f, name) __INSTR3(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
+#define INSTR3(name) __INSTR3(0, 1, name, OPC_##name)
+#define INSTR3NODST(name) __INSTR3(0, 0, name, OPC_##name)
+
+/* clang-format off */
+#define __INSTR4(flag, dst_count, name, opc) \
+static inline struct ir3_instruction *ir3_##name( \
+ struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
+ struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \
+ unsigned cflags, struct ir3_instruction *d, unsigned dflags) \
+{ \
+ struct ir3_instruction *instr = \
+ ir3_instr_create(block, opc, dst_count, 4); \
+ for (unsigned i = 0; i < dst_count; i++) \
+ __ssa_dst(instr); \
+ __ssa_src(instr, a, aflags); \
+ __ssa_src(instr, b, bflags); \
+ __ssa_src(instr, c, cflags); \
+ __ssa_src(instr, d, dflags); \
+ instr->flags |= flag; \
+ return instr; \
+}
+/* clang-format on */
+#define INSTR4F(f, name) __INSTR4(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
+#define INSTR4(name) __INSTR4(0, 1, name, OPC_##name)
+#define INSTR4NODST(name) __INSTR4(0, 0, name, OPC_##name)
+
+/* clang-format off */
+#define __INSTR5(flag, name, opc) \
+static inline struct ir3_instruction *ir3_##name( \
+ struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
+ struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \
+ unsigned cflags, struct ir3_instruction *d, unsigned dflags, \
+ struct ir3_instruction *e, unsigned eflags) \
+{ \
+ struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 5); \
+ __ssa_dst(instr); \
+ __ssa_src(instr, a, aflags); \
+ __ssa_src(instr, b, bflags); \
+ __ssa_src(instr, c, cflags); \
+ __ssa_src(instr, d, dflags); \
+ __ssa_src(instr, e, eflags); \
+ instr->flags |= flag; \
+ return instr; \
+}
+/* clang-format on */
+#define INSTR5F(f, name) __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name)
+#define INSTR5(name) __INSTR5(0, name, OPC_##name)
+
+/* clang-format off */
+#define __INSTR6(flag, dst_count, name, opc) \
+static inline struct ir3_instruction *ir3_##name( \
+ struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
+ struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \
+ unsigned cflags, struct ir3_instruction *d, unsigned dflags, \
+ struct ir3_instruction *e, unsigned eflags, struct ir3_instruction *f, \
+ unsigned fflags) \
+{ \
+ struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 6); \
+ for (unsigned i = 0; i < dst_count; i++) \
+ __ssa_dst(instr); \
+ __ssa_src(instr, a, aflags); \
+ __ssa_src(instr, b, bflags); \
+ __ssa_src(instr, c, cflags); \
+ __ssa_src(instr, d, dflags); \
+ __ssa_src(instr, e, eflags); \
+ __ssa_src(instr, f, fflags); \
+ instr->flags |= flag; \
+ return instr; \
+}
+/* clang-format on */
+#define INSTR6F(f, name) __INSTR6(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
+#define INSTR6(name) __INSTR6(0, 1, name, OPC_##name)
+#define INSTR6NODST(name) __INSTR6(0, 0, name, OPC_##name)
/* cat0 instructions: */
INSTR1NODST(B)
#include "ir3_compiler.h"
static const struct debug_named_value shader_debug_options[] = {
- {"vs", IR3_DBG_SHADER_VS, "Print shader disasm for vertex shaders"},
- {"tcs", IR3_DBG_SHADER_TCS, "Print shader disasm for tess ctrl shaders"},
- {"tes", IR3_DBG_SHADER_TES, "Print shader disasm for tess eval shaders"},
- {"gs", IR3_DBG_SHADER_GS, "Print shader disasm for geometry shaders"},
- {"fs", IR3_DBG_SHADER_FS, "Print shader disasm for fragment shaders"},
- {"cs", IR3_DBG_SHADER_CS, "Print shader disasm for compute shaders"},
- {"disasm", IR3_DBG_DISASM, "Dump NIR and adreno shader disassembly"},
- {"optmsgs", IR3_DBG_OPTMSGS, "Enable optimizer debug messages"},
- {"forces2en", IR3_DBG_FORCES2EN, "Force s2en mode for tex sampler instructions"},
- {"nouboopt", IR3_DBG_NOUBOOPT, "Disable lowering UBO to uniform"},
- {"nofp16", IR3_DBG_NOFP16, "Don't lower mediump to fp16"},
- {"nocache", IR3_DBG_NOCACHE, "Disable shader cache"},
+ /* clang-format off */
+ {"vs", IR3_DBG_SHADER_VS, "Print shader disasm for vertex shaders"},
+ {"tcs", IR3_DBG_SHADER_TCS, "Print shader disasm for tess ctrl shaders"},
+ {"tes", IR3_DBG_SHADER_TES, "Print shader disasm for tess eval shaders"},
+ {"gs", IR3_DBG_SHADER_GS, "Print shader disasm for geometry shaders"},
+ {"fs", IR3_DBG_SHADER_FS, "Print shader disasm for fragment shaders"},
+ {"cs", IR3_DBG_SHADER_CS, "Print shader disasm for compute shaders"},
+ {"disasm", IR3_DBG_DISASM, "Dump NIR and adreno shader disassembly"},
+ {"optmsgs", IR3_DBG_OPTMSGS, "Enable optimizer debug messages"},
+ {"forces2en", IR3_DBG_FORCES2EN, "Force s2en mode for tex sampler instructions"},
+ {"nouboopt", IR3_DBG_NOUBOOPT, "Disable lowering UBO to uniform"},
+ {"nofp16", IR3_DBG_NOFP16, "Don't lower mediump to fp16"},
+ {"nocache", IR3_DBG_NOCACHE, "Disable shader cache"},
#ifdef DEBUG
- /* DEBUG-only options: */
- {"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"},
- {"ramsgs", IR3_DBG_RAMSGS, "Enable register-allocation debug messages"},
+ /* DEBUG-only options: */
+ {"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"},
+ {"ramsgs", IR3_DBG_RAMSGS, "Enable register-allocation debug messages"},
#endif
- DEBUG_NAMED_VALUE_END
+ DEBUG_NAMED_VALUE_END
+ /* clang-format on */
};
DEBUG_GET_ONCE_FLAGS_OPTION(ir3_shader_debug, "IR3_SHADER_DEBUG", shader_debug_options, 0)
*
* No guarantees are made as to whether a given key/value pair is present in
* the cache after the set call. If a different value has been associated
- * with the given key in the past then it is undefined which value, if any, is
- * associated with the key after the set call. Note that while there are no
- * guarantees, the cache implementation should attempt to cache the most
+ * with the given key in the past then it is undefined which value, if any,
+ * is associated with the key after the set call. Note that while there are
+ * no guarantees, the cache implementation should attempt to cache the most
* recently set value for a given key.
*
* for this reason, because binning pass variants share const_state with
#define IBO_SSBO 0x80
uint8_t tex_to_image[32];
- uint8_t num_tex; /* including real textures */
- uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */
+ /* including real textures */
+ uint8_t num_tex;
+ /* the number of real textures, ie. image/ssbo start here */
+ uint8_t tex_base;
};
struct ir3_disasm_info {
* instruction are not counted.
*/
+/* clang-format off */
#define TEST(n, ...) { # __VA_ARGS__, n }
+/* clang-format on */
static const struct test {
const char *asmstr;
unsigned expected_delay;
} tests[] = {
- TEST(6,
- add.f r0.x, r2.x, r2.y
- rsq r0.x, r0.x
- ),
- TEST(3,
- mov.f32f32 r0.x, c0.x
- mov.f32f32 r0.y, c0.y
- add.f r0.x, r0.x, r0.y
- ),
- TEST(2,
- mov.f32f32 r0.x, c0.x
- mov.f32f32 r0.y, c0.y
- mov.f32f32 r0.z, c0.z
- mad.f32 r0.x, r0.x, r0.y, r0.z
- ),
- TEST(2,
- mov.f32f32 r0.x, c0.x
- mov.f32f32 r0.y, c0.y
- (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
- ),
- TEST(2,
- (rpt1)mov.f32f32 r0.x, c0.x
- (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
- ),
- TEST(3,
- mov.f32f32 r0.y, c0.y
- mov.f32f32 r0.x, c0.x
- (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
- ),
- TEST(1,
- (rpt2)mov.f32f32 r0.x, (r)c0.x
- add.f r0.x, r0.x, c0.x
- ),
- TEST(2,
- (rpt2)mov.f32f32 r0.x, (r)c0.x
- add.f r0.x, r0.x, r0.y
- ),
- TEST(2,
- (rpt1)mov.f32f32 r0.x, (r)c0.x
- (rpt1)add.f r0.x, (r)r0.x, c0.x
- ),
- TEST(1,
- (rpt1)mov.f32f32 r0.y, (r)c0.x
- (rpt1)add.f r0.x, (r)r0.x, c0.x
- ),
- TEST(3,
- (rpt1)mov.f32f32 r0.x, (r)c0.x
- (rpt1)add.f r0.x, (r)r0.y, c0.x
- ),
+ /* clang-format off */
+ TEST(6,
+ add.f r0.x, r2.x, r2.y
+ rsq r0.x, r0.x
+ ),
+ TEST(3,
+ mov.f32f32 r0.x, c0.x
+ mov.f32f32 r0.y, c0.y
+ add.f r0.x, r0.x, r0.y
+ ),
+ TEST(2,
+ mov.f32f32 r0.x, c0.x
+ mov.f32f32 r0.y, c0.y
+ mov.f32f32 r0.z, c0.z
+ mad.f32 r0.x, r0.x, r0.y, r0.z
+ ),
+ TEST(2,
+ mov.f32f32 r0.x, c0.x
+ mov.f32f32 r0.y, c0.y
+ (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
+ ),
+ TEST(2,
+ (rpt1)mov.f32f32 r0.x, c0.x
+ (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
+ ),
+ TEST(3,
+ mov.f32f32 r0.y, c0.y
+ mov.f32f32 r0.x, c0.x
+ (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
+ ),
+ TEST(1,
+ (rpt2)mov.f32f32 r0.x, (r)c0.x
+ add.f r0.x, r0.x, c0.x
+ ),
+ TEST(2,
+ (rpt2)mov.f32f32 r0.x, (r)c0.x
+ add.f r0.x, r0.x, r0.y
+ ),
+ TEST(2,
+ (rpt1)mov.f32f32 r0.x, (r)c0.x
+ (rpt1)add.f r0.x, (r)r0.x, c0.x
+ ),
+ TEST(1,
+ (rpt1)mov.f32f32 r0.y, (r)c0.x
+ (rpt1)add.f r0.x, (r)r0.x, c0.x
+ ),
+ TEST(3,
+ (rpt1)mov.f32f32 r0.x, (r)c0.x
+ (rpt1)add.f r0.x, (r)r0.y, c0.x
+ ),
+ /* clang-format on */
};
static struct ir3_shader *
#include "isa/isa.h"
+/* clang-format off */
#define INSTR_5XX(i, d, ...) { .gpu_id = 540, .instr = #i, .expected = d, __VA_ARGS__ }
#define INSTR_6XX(i, d, ...) { .gpu_id = 630, .instr = #i, .expected = d, __VA_ARGS__ }
+/* clang-format on */
static const struct test {
int gpu_id;
*/
bool parse_fail;
} tests[] = {
+/* clang-format off */
/* cat0 */
INSTR_6XX(00000000_00000000, "nop"),
INSTR_6XX(00000200_00000000, "(rpt2)nop"),
/* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */
INSTR_6XX(c0260200_03676100, "stib.b.untyped.1d.u32.3.imm.base0 r0.x, r0.w, 1"), /* stib.untyped.u32.1d.3.mode4.base0 r0.x, r0.w, 1 */
#if 0
- /* TODO blob sometimes/frequently sets b0, although there does not seem
- * to be an obvious pattern and our encoding never sets it. AFAICT it
- * is a dontcare bit
- */
- /* dEQP-VK.texture.filtering.cube.formats.a8b8g8r8_srgb_nearest_mipmap_nearest.txt */
- INSTR_6XX(c0220200_0361b801, "ldib.b.typed.1d.f32.4.imm r0.x, r0.w, 1"), /* ldib.f32.1d.4.mode0.base0 r0.x, r0.w, 1 */
+ /* TODO blob sometimes/frequently sets b0, although there does not seem
+ * to be an obvious pattern and our encoding never sets it. AFAICT it
+ * is a dontcare bit
+ */
+ /* dEQP-VK.texture.filtering.cube.formats.a8b8g8r8_srgb_nearest_mipmap_nearest.txt */
+ INSTR_6XX(c0220200_0361b801, "ldib.b.typed.1d.f32.4.imm r0.x, r0.w, 1"), /* ldib.f32.1d.4.mode0.base0 r0.x, r0.w, 1 */
#else
- /* dEQP-VK.texture.filtering.cube.formats.a8b8g8r8_srgb_nearest_mipmap_nearest.txt */
- INSTR_6XX(c0220200_0361b800, "ldib.b.typed.1d.f32.4.imm r0.x, r0.w, 1"), /* ldib.f32.1d.4.mode0.base0 r0.x, r0.w, 1 */
+ /* dEQP-VK.texture.filtering.cube.formats.a8b8g8r8_srgb_nearest_mipmap_nearest.txt */
+ INSTR_6XX(c0220200_0361b800, "ldib.b.typed.1d.f32.4.imm r0.x, r0.w, 1"), /* ldib.f32.1d.4.mode0.base0 r0.x, r0.w, 1 */
#endif
- /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
- INSTR_6XX(c2c21100_04800006, "stlw.f32 l[r2.x], r0.w, 4"),
- INSTR_6XX(c2c20f00_01800004, "stlw.f32 l[r1.w], r0.z, 1"),
- INSTR_6XX(c2860003_02808011, "ldlw.u32 r0.w, l[r0.z+8], 2"),
-
- /* dEQP-VK.compute.basic.shared_var_single_group */
- INSTR_6XX(c1060500_01800008, "stl.u32 l[r0.z], r1.x, 1"),
- INSTR_6XX(c0460001_01804001, "ldl.u32 r0.y, l[r0.y], 1"),
-
- INSTR_6XX(c0860018_03820001, "ldp.u32 r6.x, p[r2.x], 3"),
- INSTR_6XX(c0420002_01808019, "ldl.f32 r0.z, l[r0.z+12], 1"),
- INSTR_6XX(c1021710_04800000, "stl.f32 l[r2.w+16], r0.x, 4"),
- INSTR_6XX(d7c60011_03c00000, "(sy)ldlv.u32 r4.y, l[0], 3"),
-
- /* resinfo */
- INSTR_6XX(c0260000_0063c200, "resinfo.b.untyped.2d.u32.1.imm r0.x, 0"), /* resinfo.u32.2d.mode0.base0 r0.x, 0 */
- /* dEQP-GLES31.functional.image_load_store.buffer.image_size.writeonly_7.txt */
- INSTR_6XX(c0260000_0063c000, "resinfo.b.untyped.1d.u32.1.imm r0.x, 0"), /* resinfo.u32.1d.mode0.base0 r0.x, 0 */
- /* dEQP-VK.image.image_size.2d.readonly_12x34.txt */
- INSTR_6XX(c0260000_0063c300, "resinfo.b.untyped.2d.u32.1.imm.base0 r0.x, 0"), /* resinfo.u32.2d.mode4.base0 r0.x, 0 */
-
- /* dEQP-GLES31.functional.image_load_store.2d.image_size.readonly_writeonly_32x32.txt */
- INSTR_5XX(c3e60000_00000200, "resinfo.u32.2d r0.x, g[0]"), /* resinfo.u32.2d r0.x, 0 */
+ /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
+ INSTR_6XX(c2c21100_04800006, "stlw.f32 l[r2.x], r0.w, 4"),
+ INSTR_6XX(c2c20f00_01800004, "stlw.f32 l[r1.w], r0.z, 1"),
+ INSTR_6XX(c2860003_02808011, "ldlw.u32 r0.w, l[r0.z+8], 2"),
+
+ /* dEQP-VK.compute.basic.shared_var_single_group */
+ INSTR_6XX(c1060500_01800008, "stl.u32 l[r0.z], r1.x, 1"),
+ INSTR_6XX(c0460001_01804001, "ldl.u32 r0.y, l[r0.y], 1"),
+
+ INSTR_6XX(c0860018_03820001, "ldp.u32 r6.x, p[r2.x], 3"),
+ INSTR_6XX(c0420002_01808019, "ldl.f32 r0.z, l[r0.z+12], 1"),
+ INSTR_6XX(c1021710_04800000, "stl.f32 l[r2.w+16], r0.x, 4"),
+ INSTR_6XX(d7c60011_03c00000, "(sy)ldlv.u32 r4.y, l[0], 3"),
+
+ /* resinfo */
+ INSTR_6XX(c0260000_0063c200, "resinfo.b.untyped.2d.u32.1.imm r0.x, 0"), /* resinfo.u32.2d.mode0.base0 r0.x, 0 */
+ /* dEQP-GLES31.functional.image_load_store.buffer.image_size.writeonly_7.txt */
+ INSTR_6XX(c0260000_0063c000, "resinfo.b.untyped.1d.u32.1.imm r0.x, 0"), /* resinfo.u32.1d.mode0.base0 r0.x, 0 */
+ /* dEQP-VK.image.image_size.2d.readonly_12x34.txt */
+ INSTR_6XX(c0260000_0063c300, "resinfo.b.untyped.2d.u32.1.imm.base0 r0.x, 0"), /* resinfo.u32.2d.mode4.base0 r0.x, 0 */
+
+ /* dEQP-GLES31.functional.image_load_store.2d.image_size.readonly_writeonly_32x32.txt */
+ INSTR_5XX(c3e60000_00000200, "resinfo.u32.2d r0.x, g[0]"), /* resinfo.u32.2d r0.x, 0 */
#if 0
- /* TODO our encoding differs in b11 ('typed'), which seems to be a dontcare bit */
- /* dEQP-GLES31.functional.image_load_store.buffer.image_size.readonly_writeonly_7 */
- INSTR_5XX(c3e60000_00000e00, "resinfo.u32.4d r0.x, g[0]"), /* resinfo.u32.1dtype r0.x, 0 */
- /* dEQP-GLES31.functional.image_load_store.3d.image_size.readonly_writeonly_12x34x56 */
- INSTR_5XX(c3e60000_00000c00, "resinfo.u32.3d r0.x, g[0]"), /* resinfo.u32.3d r0.x, 0 */
+ /* TODO our encoding differs in b11 ('typed'), which seems to be a dontcare bit */
+ /* dEQP-GLES31.functional.image_load_store.buffer.image_size.readonly_writeonly_7 */
+ INSTR_5XX(c3e60000_00000e00, "resinfo.u32.4d r0.x, g[0]"), /* resinfo.u32.1dtype r0.x, 0 */
+ /* dEQP-GLES31.functional.image_load_store.3d.image_size.readonly_writeonly_12x34x56 */
+ INSTR_5XX(c3e60000_00000c00, "resinfo.u32.3d r0.x, g[0]"), /* resinfo.u32.3d r0.x, 0 */
#else
- /* dEQP-GLES31.functional.image_load_store.buffer.image_size.readonly_writeonly_7 */
- INSTR_5XX(c3e60000_00000600, "resinfo.u32.4d r0.x, g[0]"), /* resinfo.u32.1dtype r0.x, 0 */
- /* dEQP-GLES31.functional.image_load_store.2d.image_size.readonly_writeonly_32x32.txt */
- INSTR_5XX(c3e60000_00000400, "resinfo.u32.3d r0.x, g[0]"), /* resinfo.u32.3d r0.x, 0 */
+ /* dEQP-GLES31.functional.image_load_store.buffer.image_size.readonly_writeonly_7 */
+ INSTR_5XX(c3e60000_00000600, "resinfo.u32.4d r0.x, g[0]"), /* resinfo.u32.1dtype r0.x, 0 */
+ /* dEQP-GLES31.functional.image_load_store.2d.image_size.readonly_writeonly_32x32.txt */
+ INSTR_5XX(c3e60000_00000400, "resinfo.u32.3d r0.x, g[0]"), /* resinfo.u32.3d r0.x, 0 */
#endif
- /* ldgb */
- /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_vec4 */
- INSTR_5XX(c6e20000_06003600, "ldgb.untyped.4d.f32.4 r0.x, g[0], r0.x, r1.z"), /* ldgb.a.untyped.1dtype.f32.4 r0.x, g[r0.x], r1.z, 0 */
- /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_ivec4 */
- INSTR_5XX(c6ea0000_06003600, "ldgb.untyped.4d.s32.4 r0.x, g[0], r0.x, r1.z"), /* ldgb.a.untyped.1dtype.s32.4 r0.x, g[r0.x], r1.z, 0 */
- /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_float */
- INSTR_5XX(c6e20000_02000600, "ldgb.untyped.4d.f32.1 r0.x, g[0], r0.x, r0.z"), /* ldgb.a.untyped.1dtype.f32.1 r0.x, g[r0.x], r0.z, 0 */
- /* dEQP-GLES31.functional.ssbo.layout.random.vector_types.0 */
- INSTR_5XX(c6ea0008_14002600, "ldgb.untyped.4d.s32.3 r2.x, g[0], r0.x, r5.x"), /* ldgb.a.untyped.1dtype.s32.3 r2.x, g[r0.x], r5.x, 0 */
- INSTR_5XX(c6ea0204_1401a600, "ldgb.untyped.4d.s32.3 r1.x, g[1], r1.z, r5.x"), /* ldgb.a.untyped.1dtype.s32.3 r1.x, g[r1.z], r5.x, 1 */
-
- /* stgb */
- INSTR_5XX(c7220028_0480000d, "stgb.untyped.1d.f32.1 g[0], r1.z, 4, r10.x"), /* stgb.untyped.1d.1 g[r10.x], r1.z, 4, r0.x */
- INSTR_5XX(c7260023_02800009, "stgb.untyped.1d.u32.1 g[0], r1.x, 2, r8.w"), /* stgb.untyped.1d.1 g[r8.w], r1.x, 2, r0.x */
-
- /* discard stuff */
- INSTR_6XX(42b400f8_20010004, "cmps.s.eq p0.x, r1.x, 1"),
- INSTR_6XX(02800000_00000000, "kill p0.x"),
-
- /* Immediates */
- INSTR_6XX(40100007_68000008, "add.f r1.w, r2.x, (neg)(0.0)"),
- INSTR_6XX(40100007_68010008, "add.f r1.w, r2.x, (neg)(0.5)"),
- INSTR_6XX(40100007_68020008, "add.f r1.w, r2.x, (neg)(1.0)"),
- INSTR_6XX(40100007_68030008, "add.f r1.w, r2.x, (neg)(2.0)"),
- INSTR_6XX(40100007_68040008, "add.f r1.w, r2.x, (neg)(e)"),
- INSTR_6XX(40100007_68050008, "add.f r1.w, r2.x, (neg)(pi)"),
- INSTR_6XX(40100007_68060008, "add.f r1.w, r2.x, (neg)(1/pi)"),
- INSTR_6XX(40100007_68070008, "add.f r1.w, r2.x, (neg)(1/log2(e))"),
- INSTR_6XX(40100007_68080008, "add.f r1.w, r2.x, (neg)(log2(e))"),
- INSTR_6XX(40100007_68090008, "add.f r1.w, r2.x, (neg)(1/log2(10))"),
- INSTR_6XX(40100007_680a0008, "add.f r1.w, r2.x, (neg)(log2(10))"),
- INSTR_6XX(40100007_680b0008, "add.f r1.w, r2.x, (neg)(4.0)"),
-
- /* LDC. Our disasm differs greatly from qcom here, and we've got some
- * important info they lack(?!), but same goes the other way.
- */
+ /* ldgb */
+ /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_vec4 */
+ INSTR_5XX(c6e20000_06003600, "ldgb.untyped.4d.f32.4 r0.x, g[0], r0.x, r1.z"), /* ldgb.a.untyped.1dtype.f32.4 r0.x, g[r0.x], r1.z, 0 */
+ /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_ivec4 */
+ INSTR_5XX(c6ea0000_06003600, "ldgb.untyped.4d.s32.4 r0.x, g[0], r0.x, r1.z"), /* ldgb.a.untyped.1dtype.s32.4 r0.x, g[r0.x], r1.z, 0 */
+ /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_float */
+ INSTR_5XX(c6e20000_02000600, "ldgb.untyped.4d.f32.1 r0.x, g[0], r0.x, r0.z"), /* ldgb.a.untyped.1dtype.f32.1 r0.x, g[r0.x], r0.z, 0 */
+ /* dEQP-GLES31.functional.ssbo.layout.random.vector_types.0 */
+ INSTR_5XX(c6ea0008_14002600, "ldgb.untyped.4d.s32.3 r2.x, g[0], r0.x, r5.x"), /* ldgb.a.untyped.1dtype.s32.3 r2.x, g[r0.x], r5.x, 0 */
+ INSTR_5XX(c6ea0204_1401a600, "ldgb.untyped.4d.s32.3 r1.x, g[1], r1.z, r5.x"), /* ldgb.a.untyped.1dtype.s32.3 r1.x, g[r1.z], r5.x, 1 */
+
+ /* stgb */
+ INSTR_5XX(c7220028_0480000d, "stgb.untyped.1d.f32.1 g[0], r1.z, 4, r10.x"), /* stgb.untyped.1d.1 g[r10.x], r1.z, 4, r0.x */
+ INSTR_5XX(c7260023_02800009, "stgb.untyped.1d.u32.1 g[0], r1.x, 2, r8.w"), /* stgb.untyped.1d.1 g[r8.w], r1.x, 2, r0.x */
+
+ /* discard stuff */
+ INSTR_6XX(42b400f8_20010004, "cmps.s.eq p0.x, r1.x, 1"),
+ INSTR_6XX(02800000_00000000, "kill p0.x"),
+
+ /* Immediates */
+ INSTR_6XX(40100007_68000008, "add.f r1.w, r2.x, (neg)(0.0)"),
+ INSTR_6XX(40100007_68010008, "add.f r1.w, r2.x, (neg)(0.5)"),
+ INSTR_6XX(40100007_68020008, "add.f r1.w, r2.x, (neg)(1.0)"),
+ INSTR_6XX(40100007_68030008, "add.f r1.w, r2.x, (neg)(2.0)"),
+ INSTR_6XX(40100007_68040008, "add.f r1.w, r2.x, (neg)(e)"),
+ INSTR_6XX(40100007_68050008, "add.f r1.w, r2.x, (neg)(pi)"),
+ INSTR_6XX(40100007_68060008, "add.f r1.w, r2.x, (neg)(1/pi)"),
+ INSTR_6XX(40100007_68070008, "add.f r1.w, r2.x, (neg)(1/log2(e))"),
+ INSTR_6XX(40100007_68080008, "add.f r1.w, r2.x, (neg)(log2(e))"),
+ INSTR_6XX(40100007_68090008, "add.f r1.w, r2.x, (neg)(1/log2(10))"),
+ INSTR_6XX(40100007_680a0008, "add.f r1.w, r2.x, (neg)(log2(10))"),
+ INSTR_6XX(40100007_680b0008, "add.f r1.w, r2.x, (neg)(4.0)"),
+
+ /* LDC. Our disasm differs greatly from qcom here, and we've got some
+ * important info they lack(?!), but same goes the other way.
+ */
#if 0
- /* TODO our encoding differs in b23 for these four.. unsure if that is dontcare bit */
- /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.uniform_fragment */
- INSTR_6XX(c0260000_00c78040, "ldc.offset0.1.uniform r0.x, r0.x, r0.x"), /* ldc.1.mode1.base0 r0.x, 0, r0.x */
- INSTR_6XX(c0260201_00c78040, "ldc.offset0.1.uniform r0.y, r0.x, r0.y"), /* ldc.1.mode1.base0 r0.y, 0, r0.y */
- /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.dynamically_uniform_fragment */
- INSTR_6XX(c0260000_00c78080, "ldc.offset0.1.nonuniform r0.x, r0.x, r0.x"), /* ldc.1.mode2.base0 r0.x, 0, r0.x */
- INSTR_6XX(c0260201_00c78080, "ldc.offset0.1.nonuniform r0.y, r0.x, r0.y"), /* ldc.1.mode2.base0 r0.y, 0, r0.y */
+ /* TODO our encoding differs in b23 for these four.. unsure if that is dontcare bit */
+ /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.uniform_fragment */
+ INSTR_6XX(c0260000_00c78040, "ldc.offset0.1.uniform r0.x, r0.x, r0.x"), /* ldc.1.mode1.base0 r0.x, 0, r0.x */
+ INSTR_6XX(c0260201_00c78040, "ldc.offset0.1.uniform r0.y, r0.x, r0.y"), /* ldc.1.mode1.base0 r0.y, 0, r0.y */
+ /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.dynamically_uniform_fragment */
+ INSTR_6XX(c0260000_00c78080, "ldc.offset0.1.nonuniform r0.x, r0.x, r0.x"), /* ldc.1.mode2.base0 r0.x, 0, r0.x */
+ INSTR_6XX(c0260201_00c78080, "ldc.offset0.1.nonuniform r0.y, r0.x, r0.y"), /* ldc.1.mode2.base0 r0.y, 0, r0.y */
#else
- /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.uniform_fragment */
- INSTR_6XX(c0260000_00478040, "ldc.offset0.1.uniform r0.x, r0.x, r0.x"), /* ldc.1.mode1.base0 r0.x, 0, r0.x */
- INSTR_6XX(c0260201_00478040, "ldc.offset0.1.uniform r0.y, r0.x, r0.y"), /* ldc.1.mode1.base0 r0.y, 0, r0.y */
- /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.dynamically_uniform_fragment */
- INSTR_6XX(c0260000_00478080, "ldc.offset0.1.nonuniform r0.x, r0.x, r0.x"), /* ldc.1.mode2.base0 r0.x, 0, r0.x */
- INSTR_6XX(c0260201_00478080, "ldc.offset0.1.nonuniform r0.y, r0.x, r0.y"), /* ldc.1.mode2.base0 r0.y, 0, r0.y */
+ /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.uniform_fragment */
+ INSTR_6XX(c0260000_00478040, "ldc.offset0.1.uniform r0.x, r0.x, r0.x"), /* ldc.1.mode1.base0 r0.x, 0, r0.x */
+ INSTR_6XX(c0260201_00478040, "ldc.offset0.1.uniform r0.y, r0.x, r0.y"), /* ldc.1.mode1.base0 r0.y, 0, r0.y */
+ /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.dynamically_uniform_fragment */
+ INSTR_6XX(c0260000_00478080, "ldc.offset0.1.nonuniform r0.x, r0.x, r0.x"), /* ldc.1.mode2.base0 r0.x, 0, r0.x */
+ INSTR_6XX(c0260201_00478080, "ldc.offset0.1.nonuniform r0.y, r0.x, r0.y"), /* ldc.1.mode2.base0 r0.y, 0, r0.y */
#endif
- /* custom shaders, loading .x, .y, .z, .w from an array of vec4 in block 0 */
- INSTR_6XX(c0260000_00478000, "ldc.offset0.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
- INSTR_6XX(c0260000_00478200, "ldc.offset1.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
- INSTR_6XX(c0260000_00478400, "ldc.offset2.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
- INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
-
- /* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */
- INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"),
- INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"),
- INSTR_6XX(c1465ba0_01803e2a, "stp.u32 p[r11.y-96], r5.y, 1"),
- INSTR_6XX(c0860008_01860001, "ldp.u32 r2.x, p[r6.x], 1"),
- /* Custom stp based on above to catch a disasm bug. */
- INSTR_6XX(c1465b00_0180022a, "stp.u32 p[r11.y+256], r5.y, 1"),
-
- /* Atomic: */
+ /* custom shaders, loading .x, .y, .z, .w from an array of vec4 in block 0 */
+ INSTR_6XX(c0260000_00478000, "ldc.offset0.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
+ INSTR_6XX(c0260000_00478200, "ldc.offset1.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
+ INSTR_6XX(c0260000_00478400, "ldc.offset2.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
+ INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
+
+ /* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */
+ INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"),
+ INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"),
+ INSTR_6XX(c1465ba0_01803e2a, "stp.u32 p[r11.y-96], r5.y, 1"),
+ INSTR_6XX(c0860008_01860001, "ldp.u32 r2.x, p[r6.x], 1"),
+ /* Custom stp based on above to catch a disasm bug. */
+ INSTR_6XX(c1465b00_0180022a, "stp.u32 p[r11.y+256], r5.y, 1"),
+
+ /* Atomic: */
#if 0
- /* TODO our encoding differs in b53 for these two */
- INSTR_5XX(c4d60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
- INSTR_5XX(c4160205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
+ /* TODO our encoding differs in b53 for these two */
+ INSTR_5XX(c4d60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
+ INSTR_5XX(c4160205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
#else
- INSTR_5XX(c4f60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
- INSTR_5XX(c4360205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
+ INSTR_5XX(c4f60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
+ INSTR_5XX(c4360205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
#endif
- INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"),
-
- /* Bindless atomic: */
- INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
- INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
- INSTR_6XX(c0360000_0365c800, "atomic.b.max.typed.1d.u32.1.imm r0.x, r0.w, 0"), /* atomic.b.max.g.u32.1d.mode0.base0 r0.x,r0.w,0 */
-
- /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.fragment.sampler2d */
- INSTR_6XX(a0c01f04_0cc00005, "sam (f32)(xyzw)r1.x, r0.z, s#6, t#6"),
- /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d (looks like maybe the compiler didn't figure out */
- INSTR_6XX(a0c81f07_0100000b, "sam.s2en (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */
- /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */
- INSTR_6XX(a0c81f07_8100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x", .parse_fail=true), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */
-
- /* NonUniform: */
- /* dEQP-VK.descriptor_indexing.storage_buffer */
- INSTR_6XX(c0260c0a_0a61b180, "ldib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.z, r1.z"),
- INSTR_6XX(d0260e0a_09677180, "(sy)stib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.y, r1.w"),
- /* dEQP-VK.descriptor_indexing.uniform_texel_buffer */
- INSTR_6XX(a0481f00_40000405, "isaml.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.z, r0.x"),
- /* dEQP-VK.descriptor_indexing.storage_image */
- INSTR_6XX(d0360c04_02640b80, "(sy)atomic.b.add.typed.2d.u32.1.nonuniform.base0 r1.x, r0.z, r1.z"),
- /* dEQP-VK.descriptor_indexing.sampler */
- INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"),
-
- /* Custom test since we've never seen the blob emit these. */
- INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
- INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),
-
- /* cat7 */
-
- /* dEQP-VK.compute.basic.ssbo_local_barrier_single_invocation */
- INSTR_6XX(e0fa0000_00000000, "fence.g.l.r.w"),
- INSTR_6XX(e09a0000_00000000, "fence.r.w"),
- INSTR_6XX(f0420000_00000000, "(sy)bar.g"),
+ INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"),
+
+ /* Bindless atomic: */
+ INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
+ INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
+ INSTR_6XX(c0360000_0365c800, "atomic.b.max.typed.1d.u32.1.imm r0.x, r0.w, 0"), /* atomic.b.max.g.u32.1d.mode0.base0 r0.x,r0.w,0 */
+
+ /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.fragment.sampler2d */
+ INSTR_6XX(a0c01f04_0cc00005, "sam (f32)(xyzw)r1.x, r0.z, s#6, t#6"),
+ /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d (looks like maybe the compiler didn't figure out */
+ INSTR_6XX(a0c81f07_0100000b, "sam.s2en (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */
+ /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */
+ INSTR_6XX(a0c81f07_8100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x", .parse_fail=true), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */
+
+ /* NonUniform: */
+ /* dEQP-VK.descriptor_indexing.storage_buffer */
+ INSTR_6XX(c0260c0a_0a61b180, "ldib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.z, r1.z"),
+ INSTR_6XX(d0260e0a_09677180, "(sy)stib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.y, r1.w"),
+ /* dEQP-VK.descriptor_indexing.uniform_texel_buffer */
+ INSTR_6XX(a0481f00_40000405, "isaml.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.z, r0.x"),
+ /* dEQP-VK.descriptor_indexing.storage_image */
+ INSTR_6XX(d0360c04_02640b80, "(sy)atomic.b.add.typed.2d.u32.1.nonuniform.base0 r1.x, r0.z, r1.z"),
+ /* dEQP-VK.descriptor_indexing.sampler */
+ INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"),
+
+ /* Custom test since we've never seen the blob emit these. */
+ INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
+ INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),
+
+ /* cat7 */
+
+ /* dEQP-VK.compute.basic.ssbo_local_barrier_single_invocation */
+ INSTR_6XX(e0fa0000_00000000, "fence.g.l.r.w"),
+ INSTR_6XX(e09a0000_00000000, "fence.r.w"),
+ INSTR_6XX(f0420000_00000000, "(sy)bar.g"),
+/* clang-format on */
};
static void