freedreno: deduplicate a3xx+ disasm
authorRob Clark <robdclark@chromium.org>
Sat, 25 Jul 2020 17:28:42 +0000 (10:28 -0700)
committerMarge Bot <eric+marge@anholt.net>
Tue, 28 Jul 2020 09:45:08 +0000 (09:45 +0000)
Merge the extra tracking that is useful for generating stats from asm
(as opposed to ir), and for guestimating things like inputs and outputs
(mostly useful for r/e) into ir3's version and drop cffdec's version.

There is a small change in disasm output for the decode tools, in that
it no longer prints the used consts, but rather just the max accessed
const.  This is the more useful piece of information, and avoids making
the shared regmask type big enough to deal with the const reg file.
Additional error checking for invalid regids causes crashdec to bail
out sooner when decoding memory that *might* hold valid instructions.
Also, crashdec no longer prints stats, because stats aren't very useful
when trying to decode random instruction memory (which might or might
not be valid instructions).

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6070>

24 files changed:
src/freedreno/.gitlab-ci/reference/crash.log
src/freedreno/.gitlab-ci/reference/fd-clouds.log
src/freedreno/.gitlab-ci/reference/glxgears-a420.log
src/freedreno/common/disasm.h [moved from src/freedreno/decode/disasm.h with 90% similarity]
src/freedreno/common/meson.build
src/freedreno/decode/cffdump.c
src/freedreno/decode/crashdec.c
src/freedreno/decode/disasm-a2xx.c
src/freedreno/decode/disasm-a3xx.c [deleted file]
src/freedreno/decode/instr-a3xx.h [deleted file]
src/freedreno/decode/meson.build
src/freedreno/decode/pgmdump.c
src/freedreno/decode/pgmdump2.c
src/freedreno/ir3/disasm-a3xx.c
src/freedreno/ir3/instr-a3xx.h
src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_print.c
src/freedreno/ir3/ir3_shader.c
src/freedreno/ir3/regmask.h
src/freedreno/ir3/tests/disasm.c
src/freedreno/meson.build
src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c
src/gallium/drivers/freedreno/disasm.h [deleted file]
src/gallium/drivers/freedreno/meson.build

index 34a06da..b1a7986 100644 (file)
@@ -3441,12 +3441,12 @@ shader-blocks:
        :3:0006:0014[7f64a39ax_609f35bcx] (sy)(jp)(rpt3)(ul)sad.s16 hr38.z, hc367.x, (neg)hc50.y, (r)hr39.w
        :7:0007:0018[f352cfcbx_ecad502bx] (sy)unknown(7,6).g
        :4:0008:0019[818209d0x_74021646x] (rpt1)unknown(4,12) hr52.x, (r)hc401.z
-       :6:0009:0021[c90972c0x_8e905e80x] (jp)stl.s16 l[hr48.x], hr976.x, 142
+       :6:0009:0021[c90972c0x_8e905e80x] (jp)stl.s16 l[hr48.x], hr16.x, 142
        :5:0010:0022[a4827242x_46248300x] gather4b.a (s8)(y)hr16.z, hr32.x, s#1, t#35
        :4:0011:0023[82342205x_cd064d21x] (rpt2)(ul)unknown(4,17) r1.y, (neg)c<a0.x + 289>
        :5:0012:0026[a923bf8bx_81f95908x] (jp)samb.3d.a.p (u32)(xyzw)r34.w, hr33.x, hr43.x, s#15, t#64
-       :1:0013:0027[3dda8123x_a0d91ccdx] (sy)(jp)(rpt1)cov.u8u16 (even)(pos_infinity)hr<a0.x + 35>, 0xa0d91ccd
-../src/freedreno/decode/instr-a3xx.h:979: is_cat6_legacy: Assertion `instr->cat6.opc == 0' failed.
+
+../src/freedreno/ir3/disasm-a3xx.c:173: regmask_set: Assertion `num < MAX_REG' failed.
        -----------------------------------------------
        8192 (0x2000) bytes
        000000: a018c54a 0600e824 11fa09c3 edc6145b     |J...$.......[...|
@@ -4124,17 +4124,8 @@ shader-blocks:
       size: 2048
        :2:0000:0000[40846422x_d81251c5x] (sat)(ul)sign.f r8.z, (neg)hc113.y
        :4:0001:0001[938a16e2x_520c369ax] (sy)(ss)(sat)(rpt2)unknown(4,28) hr56.z, -358
-       :1:0002:0004[200a00c1x_094864d2x] cov.u16f16 hr<a0.x + 193>, hr308.z
-       :2:0003:0005[44109084x_4a201507x] (ss)unknown(2,32) (ei)r33.x, c321.w, (neg)r<a0.x + 544>
-       :4:0004:0006[882fadabx_14a391b1x] (jp)(sat)(rpt1)(ul)rsq hr42.w, (abs)(r)hc108.y
-       :3:0005:0008[6060f068x_7106601ax] (ss)(ul)mad.u16 r26.x, (neg)hr6.z, (neg)hc48.y, (r)hc65.z
-       :3:0006:0009[60ed4212x_02900201x] (rpt2)madsh.u16 hr4.z, r128.y, r54.z, r164.x
-       :0:0007:0012[005b6589x_8a054280x] (eq)(rpt5)bkt #17024
-       :3:0008:0018[7cebfff7x_dbae7deex] (sy)(ss)(jp)(sat)(rpt3)(ul)sel.b32 a3.x, (neg)(r)c891.z, (neg)c53.w, (neg)c747.z
-       :5:0009:0022[aff86b27x_fd7472ffx] (jp)unknown(5,31).o.p.base4 (u8)(xyw)hr9.w, r14.y, a1.x
-       :0:0010:0023[0ed959d7x_6d7a21a4x] (ss)(jp)(eq)(rpt1)unknown(0,13)
-       :2:0011:0025[445a8ebex_8d6e703bx] (sat)(rpt2)cmpv.s.gt (ei)r47.z, (neg)(r)59, (abs)(r)c<a0.x + 366>
-../src/freedreno/decode/instr-a3xx.h:979: is_cat6_legacy: Assertion `instr->cat6.opc == 0' failed.
+       :1:0002:0004[200a00c1x_094864d2x] cov.u16f16 hr<a0.x + 193>, 
+../src/freedreno/ir3/disasm-a3xx.c:185: regmask_get: Assertion `num < MAX_REG' failed.
        -----------------------------------------------
        8192 (0x2000) bytes
        000000: d81251c5 40846422 520c369a 938a16e2     |.Q.."d.@.6.R....|
@@ -4653,1040 +4644,8 @@ shader-blocks:
       size: 2048
        :0:0000:0000[00000000x_00003002x] nop
        :0:0001:0001[00000000x_00000000x] nop
-       :6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0003:0003[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0004:0004[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0005:0005[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0006:0006[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0007:0007[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0008:0008[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0009:0009[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0010:0010[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0011:0011[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0012:0012[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0013:0013[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0014:0014[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0015:0015[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0016:0016[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0017:0017[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0018:0018[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0019:0019[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0020:0020[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0021:0021[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0022:0022[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0023:0023[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0024:0024[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0025:0025[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0026:0026[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0027:0027[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0028:0028[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0029:0029[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0030:0030[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0031:0031[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0032:0032[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0033:0033[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0034:0034[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0035:0035[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0036:0036[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0037:0037[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0038:0038[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0039:0039[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0040:0040[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0041:0041[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0042:0042[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0043:0043[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0044:0044[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0045:0045[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0046:0046[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0047:0047[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0048:0048[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0049:0049[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0050:0050[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0051:0051[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0052:0052[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0053:0053[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0054:0054[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0055:0055[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0056:0056[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0057:0057[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0058:0058[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0059:0059[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0060:0060[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0061:0061[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0062:0062[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0063:0063[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0064:0064[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0065:0065[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0066:0066[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0067:0067[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0068:0068[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0069:0069[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0070:0070[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0071:0071[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0072:0072[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0073:0073[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0074:0074[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0075:0075[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0076:0076[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0077:0077[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0078:0078[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0079:0079[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0080:0080[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0081:0081[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0082:0082[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0083:0083[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0084:0084[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0085:0085[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0086:0086[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0087:0087[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0088:0088[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0089:0089[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0090:0090[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0091:0091[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0092:0092[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0093:0093[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0094:0094[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0095:0095[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0096:0096[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0097:0097[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0098:0098[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0099:0099[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0100:0100[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0101:0101[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0102:0102[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0103:0103[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0104:0104[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0105:0105[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0106:0106[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0107:0107[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0108:0108[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0109:0109[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0110:0110[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0111:0111[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0112:0112[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0113:0113[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0114:0114[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0115:0115[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0116:0116[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0117:0117[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0118:0118[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0119:0119[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0120:0120[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0121:0121[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0122:0122[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0123:0123[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0124:0124[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0125:0125[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0126:0126[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0127:0127[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0128:0128[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0129:0129[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0130:0130[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0131:0131[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0132:0132[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0133:0133[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0134:0134[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0135:0135[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0136:0136[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0137:0137[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0138:0138[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0139:0139[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0140:0140[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0141:0141[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0142:0142[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0143:0143[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0144:0144[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0145:0145[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0146:0146[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0147:0147[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0148:0148[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0149:0149[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0150:0150[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0151:0151[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0152:0152[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0153:0153[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0154:0154[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0155:0155[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0156:0156[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0157:0157[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0158:0158[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0159:0159[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0160:0160[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0161:0161[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0162:0162[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0163:0163[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0164:0164[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0165:0165[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0166:0166[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0167:0167[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0168:0168[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0169:0169[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0170:0170[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0171:0171[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0172:0172[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0173:0173[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0174:0174[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0175:0175[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0176:0176[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0177:0177[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0178:0178[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0179:0179[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0180:0180[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0181:0181[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0182:0182[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0183:0183[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0184:0184[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0185:0185[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0186:0186[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0187:0187[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0188:0188[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0189:0189[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0190:0190[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0191:0191[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0192:0192[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0193:0193[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0194:0194[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0195:0195[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0196:0196[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0197:0197[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0198:0198[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0199:0199[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0200:0200[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0201:0201[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0202:0202[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0203:0203[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0204:0204[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0205:0205[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0206:0206[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0207:0207[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0208:0208[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0209:0209[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0210:0210[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0211:0211[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0212:0212[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0213:0213[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0214:0214[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0215:0215[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0216:0216[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0217:0217[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0218:0218[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0219:0219[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0220:0220[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0221:0221[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0222:0222[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0223:0223[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0224:0224[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0225:0225[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0226:0226[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0227:0227[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0228:0228[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0229:0229[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0230:0230[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0231:0231[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0232:0232[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0233:0233[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0234:0234[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0235:0235[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0236:0236[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0237:0237[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0238:0238[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0239:0239[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0240:0240[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0241:0241[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0242:0242[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0243:0243[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0244:0244[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0245:0245[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0246:0246[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0247:0247[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0248:0248[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0249:0249[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0250:0250[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0251:0251[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0252:0252[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0253:0253[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0254:0254[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0255:0255[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[r45.z], 222
-       :6:0256:0256[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0257:0257[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0258:0258[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0259:0259[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0260:0260[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0261:0261[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0262:0262[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0263:0263[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0264:0264[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0265:0265[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0266:0266[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0267:0267[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0268:0268[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0269:0269[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0270:0270[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0271:0271[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0272:0272[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0273:0273[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0274:0274[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0275:0275[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0276:0276[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0277:0277[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0278:0278[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0279:0279[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0280:0280[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0281:0281[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0282:0282[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0283:0283[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0284:0284[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0285:0285[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0286:0286[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0287:0287[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0288:0288[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0289:0289[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0290:0290[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0291:0291[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0292:0292[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0293:0293[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0294:0294[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0295:0295[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0296:0296[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0297:0297[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0298:0298[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0299:0299[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0300:0300[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0301:0301[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0302:0302[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0303:0303[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0304:0304[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0305:0305[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0306:0306[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0307:0307[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0308:0308[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0309:0309[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0310:0310[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0311:0311[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0312:0312[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0313:0313[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0314:0314[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0315:0315[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0316:0316[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0317:0317[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0318:0318[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0319:0319[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0320:0320[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0321:0321[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0322:0322[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0323:0323[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0324:0324[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0325:0325[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0326:0326[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0327:0327[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0328:0328[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0329:0329[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0330:0330[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0331:0331[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0332:0332[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0333:0333[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0334:0334[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0335:0335[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0336:0336[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0337:0337[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0338:0338[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0339:0339[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0340:0340[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0341:0341[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0342:0342[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0343:0343[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0344:0344[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0345:0345[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0346:0346[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0347:0347[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0348:0348[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0349:0349[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0350:0350[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0351:0351[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0352:0352[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0353:0353[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0354:0354[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0355:0355[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0356:0356[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0357:0357[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0358:0358[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0359:0359[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0360:0360[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0361:0361[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0362:0362[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0363:0363[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0364:0364[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0365:0365[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0366:0366[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0367:0367[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0368:0368[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0369:0369[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0370:0370[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0371:0371[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0372:0372[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0373:0373[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0374:0374[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0375:0375[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0376:0376[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0377:0377[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0378:0378[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0379:0379[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0380:0380[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0381:0381[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0382:0382[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0383:0383[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0384:0384[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0385:0385[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0386:0386[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0387:0387[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0388:0388[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0389:0389[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0390:0390[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0391:0391[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0392:0392[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0393:0393[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0394:0394[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0395:0395[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0396:0396[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0397:0397[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0398:0398[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0399:0399[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0400:0400[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0401:0401[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0402:0402[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0403:0403[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0404:0404[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0405:0405[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0406:0406[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0407:0407[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0408:0408[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0409:0409[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0410:0410[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0411:0411[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0412:0412[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0413:0413[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0414:0414[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0415:0415[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0416:0416[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0417:0417[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0418:0418[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0419:0419[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0420:0420[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0421:0421[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0422:0422[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0423:0423[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0424:0424[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0425:0425[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0426:0426[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0427:0427[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0428:0428[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0429:0429[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0430:0430[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0431:0431[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0432:0432[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0433:0433[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0434:0434[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0435:0435[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0436:0436[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0437:0437[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0438:0438[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0439:0439[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0440:0440[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0441:0441[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0442:0442[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0443:0443[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0444:0444[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0445:0445[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0446:0446[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0447:0447[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0448:0448[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0449:0449[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0450:0450[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0451:0451[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0452:0452[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0453:0453[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0454:0454[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0455:0455[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0456:0456[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0457:0457[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0458:0458[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0459:0459[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0460:0460[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0461:0461[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0462:0462[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0463:0463[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0464:0464[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0465:0465[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0466:0466[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0467:0467[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0468:0468[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0469:0469[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0470:0470[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0471:0471[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0472:0472[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0473:0473[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0474:0474[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0475:0475[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0476:0476[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0477:0477[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0478:0478[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0479:0479[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0480:0480[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0481:0481[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0482:0482[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0483:0483[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0484:0484[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0485:0485[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0486:0486[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0487:0487[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0488:0488[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0489:0489[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0490:0490[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0491:0491[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0492:0492[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0493:0493[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0494:0494[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0495:0495[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0496:0496[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0497:0497[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0498:0498[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0499:0499[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0500:0500[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0501:0501[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0502:0502[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0503:0503[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0504:0504[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0505:0505[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0506:0506[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0507:0507[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0508:0508[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0509:0509[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0510:0510[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :6:0511:0511[deafbeadx_deafbeadx] (sy)(jp)atomic.xor.typed.4d.s8.4.l hr43.y, l[r47.z], 222
-       :0:0512:0512[00500240x_00024000x] (rpt2)nop
-       :0:0513:0515[00402020x_00000000x] nop
-       :0:0514:0516[00000040x_00001000x] nop
-       :0:0515:0517[00510401x_00024020x] (eq)(rpt4)nop
-       :0:0516:0522[00100080x_00000008x] nop
-       :0:0517:0523[00000044x_00002080x] nop
-       :0:0518:0524[00001000x_00000000x] (ss)nop
-       :0:0519:0525[00200000x_00000008x] nop
-       :0:0520:0526[00000044x_00048110x] nop
-       :0:0521:0527[00000040x_00508000x] nop
-       :0:0522:0528[00010200x_00020044x] (eq)(rpt2)nop
-       :0:0523:0531[00000000x_00201014x] nop
-       :0:0524:0532[00012100x_00101100x] (eq)(rpt1)nop
-       :0:0525:0534[00000012x_00005000x] nop
-       :0:0526:0535[00000010x_00005000x] nop
-       :0:0527:0536[00040000x_00000020x] nop
-       :0:0528:0537[00002101x_00082514x] (rpt1)nop
-       :0:0529:0539[00000000x_00210020x] nop
-       :0:0530:0540[00440004x_00010002x] nop
-       :0:0531:0541[00000002x_00000250x] nop
-       :0:0532:0542[00000040x_00100000x] nop
-       :0:0533:0543[00000000x_00020014x] nop
-       :0:0534:0544[000400a0x_00050020x] nop
-       :0:0535:0545[00100000x_00000000x] nop
-       :0:0536:0546[00000000x_00044081x] nop
-       :0:0537:0547[00000000x_00000000x] nop
-       :0:0538:0548[00200048x_00000100x] nop
-       :0:0539:0549[00080020x_00000000x] nop
-       :0:0540:0550[00200002x_00200001x] nop
-       :0:0541:0551[002000a4x_00000404x] nop
-       :0:0542:0552[00440246x_00000004x] (rpt2)nop
-       :0:0543:0555[0008c040x_00442000x] nop
-       :0:0544:0556[002112a0x_00200000x] (ss)(eq)(rpt2)nop
-       :0:0545:0559[00000000x_00000000x] nop
-       :0:0546:0560[00000240x_00400001x] (rpt2)nop
-       :0:0547:0563[00000000x_00040400x] nop
-       :0:0548:0564[0000a100x_00104010x] (rpt1)nop
-       :0:0549:0566[00008480x_00002001x] (rpt4)nop
-       :0:0550:0571[00000001x_00000040x] nop
-       :0:0551:0572[00040001x_00040400x] nop
-       :0:0552:0573[00200000x_00040600x] nop
-       :0:0553:0574[00000100x_00100000x] (rpt1)nop
-       :0:0554:0576[00504180x_0020a200x] (rpt1)nop
-       :0:0555:0578[00000000x_00000000x] nop
-       :0:0556:0579[00000024x_00004000x] nop
-       :0:0557:0580[00200000x_00100008x] nop
-       :0:0558:0581[00010080x_00000000x] (eq)nop
-       :0:0559:0582[00080000x_00000000x] nop
-       :0:0560:0583[00084000x_00500400x] nop
-       :0:0561:0584[00004000x_00008000x] nop
-       :0:0562:0585[00200000x_00000300x] nop
-       :0:0563:0586[00000042x_00020001x] nop
-       :0:0564:0587[00005600x_00400088x] (ss)(rpt6)nop
-       :0:0565:0594[00000002x_00000000x] nop
-       :0:0566:0595[0002005ex_00400008x] bkt #8
-       :0:0567:0596[00020020x_00200000x] bkt #0
-       :0:0568:0597[001e0414x_00055480x] (rpt4)bkt #21632
-       :0:0569:0602[00000000x_00000000x] nop
-       :0:0570:0603[00000442x_00000480x] (rpt4)nop
-       :0:0571:0608[00000200x_00080000x] (rpt2)nop
-       :0:0572:0611[00520000x_00600400x] bkt #1024
-       :0:0573:0612[00001200x_00000008x] (ss)(rpt2)nop
-       :0:0574:0615[00400114x_00201000x] (rpt1)nop
-       :0:0575:0617[00110100x_00100002x] (eq)(rpt1)nop
-       :0:0576:0619[00404200x_00200683x] (rpt2)nop
-       :0:0577:0622[00000090x_00000004x] nop
-       :0:0578:0623[00502000x_00002000x] nop
-       :0:0579:0624[00000004x_00000020x] nop
-       :0:0580:0625[00103100x_00600010x] (ss)(rpt1)nop
-       :0:0581:0627[00000002x_00000010x] nop
-       :0:0582:0628[00004000x_00021200x] nop
-       :0:0583:0629[00000000x_00000000x] nop
-       :0:0584:0630[00201400x_0010220ax] (ss)(rpt4)nop
-       :0:0585:0635[00000000x_00030000x] nop
-       :0:0586:0636[00080040x_00400000x] nop
-       :0:0587:0637[00000080x_00000002x] nop
-       :0:0588:0638[00000580x_00000400x] (rpt5)nop
-       :0:0589:0644[00000200x_00000022x] (rpt2)nop
-       :0:0590:0647[00080000x_00300042x] nop
-       :0:0591:0648[00008000x_00040200x] nop
-       :0:0592:0649[00000000x_00040000x] nop
-       :0:0593:0650[0012008ax_00000010x] bkt #16
-       :0:0594:0651[00000100x_00000000x] (rpt1)nop
-       :0:0595:0653[00010000x_00010018x] (eq)nop
-       :0:0596:0654[00500011x_00440020x] nop
-       :0:0597:0655[00100000x_00000000x] nop
-       :0:0598:0656[00008200x_0004020cx] (rpt2)nop
-       :0:0599:0659[00000400x_00100010x] (rpt4)nop
-       :0:0600:0664[00000004x_00118000x] nop
-       :0:0601:0665[00000002x_00004200x] nop
-       :0:0602:0666[00026300x_00000210x] (rpt3)bkt #528
-       :0:0603:0670[0000a002x_00000040x] nop
-       :0:0604:0671[00081100x_00004082x] (ss)(rpt1)nop
-       :0:0605:0673[00000008x_00210000x] nop
-       :0:0606:0674[00020004x_00020000x] bkt #0
-       :0:0607:0675[00020000x_00064108x] bkt #16648
-       :0:0608:0676[00000084x_00020000x] nop
-       :0:0609:0677[00000181x_00000430x] (rpt1)nop
-       :0:0610:0679[001c8100x_00100002x] (rpt1)nop
-       :0:0611:0681[00000000x_00200020x] nop
-       :0:0612:0682[00100081x_00002000x] nop
-       :0:0613:0683[00000000x_00000008x] nop
-       :0:0614:0684[00009420x_00000024x] (ss)(rpt4)nop
-       :0:0615:0689[00000100x_00002010x] (rpt1)nop
-       :0:0616:0691[00004188x_00000000x] (rpt1)nop
-       :0:0617:0693[00100000x_00002000x] nop
-       :0:0618:0694[00120102x_00040000x] (rpt1)bkt #0
-       :0:0619:0696[00040002x_00000000x] nop
-       :0:0620:0697[00224200x_00210201x] (rpt2)bkt #513
-       :0:0621:0700[00000200x_00040000x] (rpt2)nop
-       :0:0622:0703[0000000cx_00000000x] nop
-       :0:0623:0704[00000000x_00005000x] nop
-       :0:0624:0705[00082208x_00010200x] (rpt2)nop
-       :0:0625:0708[00194011x_00000000x] (eq)nop
-       :0:0626:0709[00012100x_00000502x] (eq)(rpt1)nop
-       :0:0627:0711[00000240x_00040050x] (rpt2)nop
-       :0:0628:0714[00080211x_00004180x] (rpt2)nop
-       :0:0629:0717[00000000x_00001008x] nop
-       :0:0630:0718[00020490x_002004a0x] (rpt4)bkt #1184
-       :0:0631:0723[00210004x_00001080x] (eq)nop
-       :0:0632:0724[00000000x_00300040x] nop
-       :0:0633:0725[00008002x_00000020x] nop
-       :0:0634:0726[00000000x_00041098x] nop
-       :0:0635:0727[002000a0x_00000000x] nop
-       :0:0636:0728[00000000x_000c0400x] nop
-       :0:0637:0729[00000401x_00000402x] (rpt4)nop
-       :0:0638:0734[00002000x_00200400x] nop
-       :0:0639:0735[00000101x_00001000x] (rpt1)nop
-       :0:0640:0737[00500240x_00024000x] (rpt2)nop
-       :0:0641:0740[00402020x_00000000x] nop
-       :0:0642:0741[00000040x_00001000x] nop
-       :0:0643:0742[00510401x_00024020x] (eq)(rpt4)nop
-       :0:0644:0747[00100080x_00000008x] nop
-       :0:0645:0748[00000044x_00002080x] nop
-       :0:0646:0749[00001000x_00000000x] (ss)nop
-       :0:0647:0750[00200000x_00000008x] nop
-       :0:0648:0751[00000044x_00048110x] nop
-       :0:0649:0752[00000040x_00508000x] nop
-       :0:0650:0753[00010200x_00020044x] (eq)(rpt2)nop
-       :0:0651:0756[00000000x_00201014x] nop
-       :0:0652:0757[00012100x_00101100x] (eq)(rpt1)nop
-       :0:0653:0759[00000012x_00005000x] nop
-       :0:0654:0760[00000010x_00005000x] nop
-       :0:0655:0761[00040000x_00000020x] nop
-       :0:0656:0762[00002101x_00082514x] (rpt1)nop
-       :0:0657:0764[00000000x_00210020x] nop
-       :0:0658:0765[00440004x_00010002x] nop
-       :0:0659:0766[00000002x_00000250x] nop
-       :0:0660:0767[00000040x_00100000x] nop
-       :0:0661:0768[00000000x_00020014x] nop
-       :0:0662:0769[000400a0x_00050020x] nop
-       :0:0663:0770[00100000x_00000000x] nop
-       :0:0664:0771[00000000x_00044081x] nop
-       :0:0665:0772[00000000x_00000000x] nop
-       :0:0666:0773[00200048x_00000100x] nop
-       :0:0667:0774[00080020x_00000000x] nop
-       :0:0668:0775[00200002x_00200001x] nop
-       :0:0669:0776[002000a4x_00000404x] nop
-       :0:0670:0777[00440246x_00000004x] (rpt2)nop
-       :0:0671:0780[0008c040x_00442000x] nop
-       :0:0672:0781[002112a0x_00200000x] (ss)(eq)(rpt2)nop
-       :0:0673:0784[00000000x_00000000x] nop
-       :0:0674:0785[00000240x_00400001x] (rpt2)nop
-       :0:0675:0788[00000000x_00040400x] nop
-       :0:0676:0789[0000a100x_00104010x] (rpt1)nop
-       :0:0677:0791[00008480x_00002001x] (rpt4)nop
-       :0:0678:0796[00000001x_00000040x] nop
-       :0:0679:0797[00040001x_00040400x] nop
-       :0:0680:0798[00200000x_00040600x] nop
-       :0:0681:0799[00000100x_00100000x] (rpt1)nop
-       :0:0682:0801[00504180x_0020a200x] (rpt1)nop
-       :0:0683:0803[00000000x_00000000x] nop
-       :0:0684:0804[00000024x_00004000x] nop
-       :0:0685:0805[00200000x_00100008x] nop
-       :0:0686:0806[00010080x_00000000x] (eq)nop
-       :0:0687:0807[00080000x_00000000x] nop
-       :0:0688:0808[00084000x_00500400x] nop
-       :0:0689:0809[00004000x_00008000x] nop
-       :0:0690:0810[00200000x_00000300x] nop
-       :0:0691:0811[00000042x_00020001x] nop
-       :0:0692:0812[00005600x_00400088x] (ss)(rpt6)nop
-       :0:0693:0819[00000002x_00000000x] nop
-       :0:0694:0820[0002005ex_00400008x] bkt #8
-       :0:0695:0821[00020020x_00200000x] bkt #0
-       :0:0696:0822[001e0414x_00055480x] (rpt4)bkt #21632
-       :0:0697:0827[00000000x_00000000x] nop
-       :0:0698:0828[00000442x_00000480x] (rpt4)nop
-       :0:0699:0833[00000200x_00080000x] (rpt2)nop
-       :0:0700:0836[00520000x_00600400x] bkt #1024
-       :0:0701:0837[00001200x_00000008x] (ss)(rpt2)nop
-       :0:0702:0840[00400114x_00201000x] (rpt1)nop
-       :0:0703:0842[00110100x_00100002x] (eq)(rpt1)nop
-       :0:0704:0844[00404200x_00200683x] (rpt2)nop
-       :0:0705:0847[00000090x_00000004x] nop
-       :0:0706:0848[00502000x_00002000x] nop
-       :0:0707:0849[00000004x_00000020x] nop
-       :0:0708:0850[00103100x_00600010x] (ss)(rpt1)nop
-       :0:0709:0852[00000002x_00000010x] nop
-       :0:0710:0853[00004000x_00021200x] nop
-       :0:0711:0854[00000000x_00000000x] nop
-       :0:0712:0855[00201400x_0010220ax] (ss)(rpt4)nop
-       :0:0713:0860[00000000x_00030000x] nop
-       :0:0714:0861[00080040x_00400000x] nop
-       :0:0715:0862[00000080x_00000002x] nop
-       :0:0716:0863[00000580x_00000400x] (rpt5)nop
-       :0:0717:0869[00000200x_00000022x] (rpt2)nop
-       :0:0718:0872[00080000x_00300042x] nop
-       :0:0719:0873[00008000x_00040200x] nop
-       :0:0720:0874[00000000x_00040000x] nop
-       :0:0721:0875[0012008ax_00000010x] bkt #16
-       :0:0722:0876[00000100x_00000000x] (rpt1)nop
-       :0:0723:0878[00010000x_00010018x] (eq)nop
-       :0:0724:0879[00500011x_00440020x] nop
-       :0:0725:0880[00100000x_00000000x] nop
-       :0:0726:0881[00008200x_0004020cx] (rpt2)nop
-       :0:0727:0884[00000400x_00100010x] (rpt4)nop
-       :0:0728:0889[00000004x_00118000x] nop
-       :0:0729:0890[00000002x_00004200x] nop
-       :0:0730:0891[00026300x_00000210x] (rpt3)bkt #528
-       :0:0731:0895[0000a002x_00000040x] nop
-       :0:0732:0896[00081100x_00004082x] (ss)(rpt1)nop
-       :0:0733:0898[00000008x_00210000x] nop
-       :0:0734:0899[00020004x_00020000x] bkt #0
-       :0:0735:0900[00020000x_00064108x] bkt #16648
-       :0:0736:0901[00000084x_00020000x] nop
-       :0:0737:0902[00000181x_00000430x] (rpt1)nop
-       :0:0738:0904[001c8100x_00100002x] (rpt1)nop
-       :0:0739:0906[00000000x_00200020x] nop
-       :0:0740:0907[00100081x_00002000x] nop
-       :0:0741:0908[00000000x_00000008x] nop
-       :0:0742:0909[00009420x_00000024x] (ss)(rpt4)nop
-       :0:0743:0914[00000100x_00002010x] (rpt1)nop
-       :0:0744:0916[00004188x_00000000x] (rpt1)nop
-       :0:0745:0918[00100000x_00002000x] nop
-       :0:0746:0919[00120102x_00040000x] (rpt1)bkt #0
-       :0:0747:0921[00040002x_00000000x] nop
-       :0:0748:0922[00224200x_00210201x] (rpt2)bkt #513
-       :0:0749:0925[00000200x_00040000x] (rpt2)nop
-       :0:0750:0928[0000000cx_00000000x] nop
-       :0:0751:0929[00000000x_00005000x] nop
-       :0:0752:0930[00082208x_00010200x] (rpt2)nop
-       :0:0753:0933[00194011x_00000000x] (eq)nop
-       :0:0754:0934[00012100x_00000502x] (eq)(rpt1)nop
-       :0:0755:0936[00000240x_00040050x] (rpt2)nop
-       :0:0756:0939[00080211x_00004180x] (rpt2)nop
-       :0:0757:0942[00000000x_00001008x] nop
-       :0:0758:0943[00020490x_002004a0x] (rpt4)bkt #1184
-       :0:0759:0948[00210004x_00001080x] (eq)nop
-       :0:0760:0949[00000000x_00300040x] nop
-       :0:0761:0950[00008002x_00000020x] nop
-       :0:0762:0951[00000000x_00041098x] nop
-       :0:0763:0952[002000a0x_00000000x] nop
-       :0:0764:0953[00000000x_000c0400x] nop
-       :0:0765:0954[00000401x_00000402x] (rpt4)nop
-       :0:0766:0959[00002000x_00200400x] nop
-       :0:0767:0960[00000101x_00001000x] (rpt1)nop
-       :0:0768:0962[00000000x_00000000x] nop
-       :0:0769:0963[00000000x_00000000x] nop
-       :0:0770:0964[00000000x_00000000x] nop
-       :0:0771:0965[00000000x_00000000x] nop
-       :0:0772:0966[00000000x_00000000x] nop
-       :0:0773:0967[00000000x_00000000x] nop
-       :0:0774:0968[00000000x_00000000x] nop
-       :0:0775:0969[00000000x_00000000x] nop
-       :0:0776:0970[00000000x_00000000x] nop
-       :0:0777:0971[00000000x_00000000x] nop
-       :0:0778:0972[00000000x_00000000x] nop
-       :0:0779:0973[00000000x_00000000x] nop
-       :0:0780:0974[00000000x_00000000x] nop
-       :0:0781:0975[00000000x_00000000x] nop
-       :0:0782:0976[00000000x_00000000x] nop
-       :0:0783:0977[00000000x_00000000x] nop
-       :0:0784:0978[00000000x_00000000x] nop
-       :0:0785:0979[00000000x_00000000x] nop
-       :0:0786:0980[00000000x_00000000x] nop
-       :0:0787:0981[00000000x_00000000x] nop
-       :0:0788:0982[00000000x_00000000x] nop
-       :0:0789:0983[00000000x_00000000x] nop
-       :0:0790:0984[00000000x_00000000x] nop
-       :0:0791:0985[00000000x_00000000x] nop
-       :0:0792:0986[00000000x_00000000x] nop
-       :0:0793:0987[00000000x_00000000x] nop
-       :0:0794:0988[00000000x_00000000x] nop
-       :0:0795:0989[00000000x_00000000x] nop
-       :0:0796:0990[00000000x_00000000x] nop
-       :0:0797:0991[00000000x_00000000x] nop
-       :0:0798:0992[00000000x_00000000x] nop
-       :0:0799:0993[00000000x_00000000x] nop
-       :0:0800:0994[00000000x_00000000x] nop
-       :0:0801:0995[00000000x_00000000x] nop
-       :0:0802:0996[00000000x_00000000x] nop
-       :0:0803:0997[00000000x_00000000x] nop
-       :0:0804:0998[00000000x_00000000x] nop
-       :0:0805:0999[00000000x_00000000x] nop
-       :0:0806:1000[00000000x_00000000x] nop
-       :0:0807:1001[00000000x_00000000x] nop
-       :0:0808:1002[00000000x_00000000x] nop
-       :0:0809:1003[00000000x_00000000x] nop
-       :0:0810:1004[00000000x_00000000x] nop
-       :0:0811:1005[00000000x_00000000x] nop
-       :0:0812:1006[00000000x_00000000x] nop
-       :0:0813:1007[00000000x_00000000x] nop
-       :0:0814:1008[00000000x_00000000x] nop
-       :0:0815:1009[00000000x_00000000x] nop
-       :0:0816:1010[00000000x_00000000x] nop
-       :0:0817:1011[00000000x_00000000x] nop
-       :0:0818:1012[00000000x_00000000x] nop
-       :0:0819:1013[00000000x_00000000x] nop
-       :0:0820:1014[00000000x_00000000x] nop
-       :0:0821:1015[00000000x_00000000x] nop
-       :0:0822:1016[00000000x_00000000x] nop
-       :0:0823:1017[00000000x_00000000x] nop
-       :0:0824:1018[00000000x_00000000x] nop
-       :0:0825:1019[00000000x_00000000x] nop
-       :0:0826:1020[00000000x_00000000x] nop
-       :0:0827:1021[00000000x_00000000x] nop
-       :0:0828:1022[00000000x_00000000x] nop
-       :0:0829:1023[00000000x_00000000x] nop
-       :0:0830:1024[00000000x_00000000x] nop
-       :0:0831:1025[00000000x_00000000x] nop
-       :0:0832:1026[00000000x_00000000x] nop
-       :0:0833:1027[00000000x_00000000x] nop
-       :0:0834:1028[00000000x_00000000x] nop
-       :0:0835:1029[00000000x_00000000x] nop
-       :0:0836:1030[00000000x_00000000x] nop
-       :0:0837:1031[00000000x_00000000x] nop
-       :0:0838:1032[00000000x_00000000x] nop
-       :0:0839:1033[00000000x_00000000x] nop
-       :0:0840:1034[00000000x_00000000x] nop
-       :0:0841:1035[00000000x_00000000x] nop
-       :0:0842:1036[00000000x_00000000x] nop
-       :0:0843:1037[00000000x_00000000x] nop
-       :0:0844:1038[00000000x_00000000x] nop
-       :0:0845:1039[00000000x_00000000x] nop
-       :0:0846:1040[00000000x_00000000x] nop
-       :0:0847:1041[00000000x_00000000x] nop
-       :0:0848:1042[00000000x_00000000x] nop
-       :0:0849:1043[00000000x_00000000x] nop
-       :0:0850:1044[00000000x_00000000x] nop
-       :0:0851:1045[00000000x_00000000x] nop
-       :0:0852:1046[00000000x_00000000x] nop
-       :0:0853:1047[00000000x_00000000x] nop
-       :0:0854:1048[00000000x_00000000x] nop
-       :0:0855:1049[00000000x_00000000x] nop
-       :0:0856:1050[00000000x_00000000x] nop
-       :0:0857:1051[00000000x_00000000x] nop
-       :0:0858:1052[00000000x_00000000x] nop
-       :0:0859:1053[00000000x_00000000x] nop
-       :0:0860:1054[00000000x_00000000x] nop
-       :0:0861:1055[00000000x_00000000x] nop
-       :0:0862:1056[00000000x_00000000x] nop
-       :0:0863:1057[00000000x_00000000x] nop
-       :0:0864:1058[00000000x_00000000x] nop
-       :0:0865:1059[00000000x_00000000x] nop
-       :0:0866:1060[00000000x_00000000x] nop
-       :0:0867:1061[00000000x_00000000x] nop
-       :0:0868:1062[00000000x_00000000x] nop
-       :0:0869:1063[00000000x_00000000x] nop
-       :0:0870:1064[00000000x_00000000x] nop
-       :0:0871:1065[00000000x_00000000x] nop
-       :0:0872:1066[00000000x_00000000x] nop
-       :0:0873:1067[00000000x_00000000x] nop
-       :0:0874:1068[00000000x_00000000x] nop
-       :0:0875:1069[00000000x_00000000x] nop
-       :0:0876:1070[00000000x_00000000x] nop
-       :0:0877:1071[00000000x_00000000x] nop
-       :0:0878:1072[00000000x_00000000x] nop
-       :0:0879:1073[00000000x_00000000x] nop
-       :0:0880:1074[00000000x_00000000x] nop
-       :0:0881:1075[00000000x_00000000x] nop
-       :0:0882:1076[00000000x_00000000x] nop
-       :0:0883:1077[00000000x_00000000x] nop
-       :0:0884:1078[00000000x_00000000x] nop
-       :0:0885:1079[00000000x_00000000x] nop
-       :0:0886:1080[00000000x_00000000x] nop
-       :0:0887:1081[00000000x_00000000x] nop
-       :0:0888:1082[00000000x_00000000x] nop
-       :0:0889:1083[00000000x_00000000x] nop
-       :0:0890:1084[00000000x_00000000x] nop
-       :0:0891:1085[00000000x_00000000x] nop
-       :0:0892:1086[00000000x_00000000x] nop
-       :0:0893:1087[00000000x_00000000x] nop
-       :0:0894:1088[00000000x_00000000x] nop
-       :0:0895:1089[00000000x_00000000x] nop
-       :0:0896:1090[00000000x_00000000x] nop
-       :0:0897:1091[00000000x_00000000x] nop
-       :0:0898:1092[00000000x_00000000x] nop
-       :0:0899:1093[00000000x_00000000x] nop
-       :0:0900:1094[00000000x_00000000x] nop
-       :0:0901:1095[00000000x_00000000x] nop
-       :0:0902:1096[00000000x_00000000x] nop
-       :0:0903:1097[00000000x_00000000x] nop
-       :0:0904:1098[00000000x_00000000x] nop
-       :0:0905:1099[00000000x_00000000x] nop
-       :0:0906:1100[00000000x_00000000x] nop
-       :0:0907:1101[00000000x_00000000x] nop
-       :0:0908:1102[00000000x_00000000x] nop
-       :0:0909:1103[00000000x_00000000x] nop
-       :0:0910:1104[00000000x_00000000x] nop
-       :0:0911:1105[00000000x_00000000x] nop
-       :0:0912:1106[00000000x_00000000x] nop
-       :0:0913:1107[00000000x_00000000x] nop
-       :0:0914:1108[00000000x_00000000x] nop
-       :0:0915:1109[00000000x_00000000x] nop
-       :0:0916:1110[00000000x_00000000x] nop
-       :0:0917:1111[00000000x_00000000x] nop
-       :0:0918:1112[00000000x_00000000x] nop
-       :0:0919:1113[00000000x_00000000x] nop
-       :0:0920:1114[00000000x_00000000x] nop
-       :0:0921:1115[00000000x_00000000x] nop
-       :0:0922:1116[00000000x_00000000x] nop
-       :0:0923:1117[00000000x_00000000x] nop
-       :0:0924:1118[00000000x_00000000x] nop
-       :0:0925:1119[00000000x_00000000x] nop
-       :0:0926:1120[00000000x_00000000x] nop
-       :0:0927:1121[00000000x_00000000x] nop
-       :0:0928:1122[00000000x_00000000x] nop
-       :0:0929:1123[00000000x_00000000x] nop
-       :0:0930:1124[00000000x_00000000x] nop
-       :0:0931:1125[00000000x_00000000x] nop
-       :0:0932:1126[00000000x_00000000x] nop
-       :0:0933:1127[00000000x_00000000x] nop
-       :0:0934:1128[00000000x_00000000x] nop
-       :0:0935:1129[00000000x_00000000x] nop
-       :0:0936:1130[00000000x_00000000x] nop
-       :0:0937:1131[00000000x_00000000x] nop
-       :0:0938:1132[00000000x_00000000x] nop
-       :0:0939:1133[00000000x_00000000x] nop
-       :0:0940:1134[00000000x_00000000x] nop
-       :0:0941:1135[00000000x_00000000x] nop
-       :0:0942:1136[00000000x_00000000x] nop
-       :0:0943:1137[00000000x_00000000x] nop
-       :0:0944:1138[00000000x_00000000x] nop
-       :0:0945:1139[00000000x_00000000x] nop
-       :0:0946:1140[00000000x_00000000x] nop
-       :0:0947:1141[00000000x_00000000x] nop
-       :0:0948:1142[00000000x_00000000x] nop
-       :0:0949:1143[00000000x_00000000x] nop
-       :0:0950:1144[00000000x_00000000x] nop
-       :0:0951:1145[00000000x_00000000x] nop
-       :0:0952:1146[00000000x_00000000x] nop
-       :0:0953:1147[00000000x_00000000x] nop
-       :0:0954:1148[00000000x_00000000x] nop
-       :0:0955:1149[00000000x_00000000x] nop
-       :0:0956:1150[00000000x_00000000x] nop
-       :0:0957:1151[00000000x_00000000x] nop
-       :0:0958:1152[00000000x_00000000x] nop
-       :0:0959:1153[00000000x_00000000x] nop
-       :0:0960:1154[00000000x_00000000x] nop
-       :0:0961:1155[00000000x_00000000x] nop
-       :0:0962:1156[00000000x_00000000x] nop
-       :0:0963:1157[00000000x_00000000x] nop
-       :0:0964:1158[00000000x_00000000x] nop
-       :0:0965:1159[00000000x_00000000x] nop
-       :0:0966:1160[00000000x_00000000x] nop
-       :0:0967:1161[00000000x_00000000x] nop
-       :0:0968:1162[00000000x_00000000x] nop
-       :0:0969:1163[00000000x_00000000x] nop
-       :0:0970:1164[00000000x_00000000x] nop
-       :0:0971:1165[00000000x_00000000x] nop
-       :0:0972:1166[00000000x_00000000x] nop
-       :0:0973:1167[00000000x_00000000x] nop
-       :0:0974:1168[00000000x_00000000x] nop
-       :0:0975:1169[00000000x_00000000x] nop
-       :0:0976:1170[00000000x_00000000x] nop
-       :0:0977:1171[00000000x_00000000x] nop
-       :0:0978:1172[00000000x_00000000x] nop
-       :0:0979:1173[00000000x_00000000x] nop
-       :0:0980:1174[00000000x_00000000x] nop
-       :0:0981:1175[00000000x_00000000x] nop
-       :0:0982:1176[00000000x_00000000x] nop
-       :0:0983:1177[00000000x_00000000x] nop
-       :0:0984:1178[00000000x_00000000x] nop
-       :0:0985:1179[00000000x_00000000x] nop
-       :0:0986:1180[00000000x_00000000x] nop
-       :0:0987:1181[00000000x_00000000x] nop
-       :0:0988:1182[00000000x_00000000x] nop
-       :0:0989:1183[00000000x_00000000x] nop
-       :0:0990:1184[00000000x_00000000x] nop
-       :0:0991:1185[00000000x_00000000x] nop
-       :0:0992:1186[00000000x_00000000x] nop
-       :0:0993:1187[00000000x_00000000x] nop
-       :0:0994:1188[00000000x_00000000x] nop
-       :0:0995:1189[00000000x_00000000x] nop
-       :0:0996:1190[00000000x_00000000x] nop
-       :0:0997:1191[00000000x_00000000x] nop
-       :0:0998:1192[00000000x_00000000x] nop
-       :0:0999:1193[00000000x_00000000x] nop
-       :0:1000:1194[00000000x_00000000x] nop
-       :0:1001:1195[00000000x_00000000x] nop
-       :0:1002:1196[00000000x_00000000x] nop
-       :0:1003:1197[00000000x_00000000x] nop
-       :0:1004:1198[00000000x_00000000x] nop
-       :0:1005:1199[00000000x_00000000x] nop
-       :0:1006:1200[00000000x_00000000x] nop
-       :0:1007:1201[00000000x_00000000x] nop
-       :0:1008:1202[00000000x_00000000x] nop
-       :0:1009:1203[00000000x_00000000x] nop
-       :0:1010:1204[00000000x_00000000x] nop
-       :0:1011:1205[00000000x_00000000x] nop
-       :0:1012:1206[00000000x_00000000x] nop
-       :0:1013:1207[00000000x_00000000x] nop
-       :0:1014:1208[00000000x_00000000x] nop
-       :0:1015:1209[00000000x_00000000x] nop
-       :0:1016:1210[00000000x_00000000x] nop
-       :0:1017:1211[00000000x_00000000x] nop
-       :0:1018:1212[00000000x_00000000x] nop
-       :0:1019:1213[00000000x_00000000x] nop
-       :0:1020:1214[00000000x_00000000x] nop
-       :0:1021:1215[00000000x_00000000x] nop
-       :0:1022:1216[00000000x_00000000x] nop
-       :0:1023:1217[00000000x_00000000x] nop
-       Register Stats:
-       - used (half): 173 239 (cnt=2, max=173)
-       - used (full): 182 190 (cnt=2, max=190)
-       - used (merged): 173 239 364-365 380-381 (cnt=6, max=173)
-       - input (half): 173 239 (cnt=2, max=173)
-       - input (full): 182 190 (cnt=2, max=190)
-       - const (half): (cnt=0, max=0)
-       - const (full): (cnt=0, max=0)
-       - output (half): (cnt=0, max=0)  (estimated)
-       - output (full): (cnt=0, max=0)  (estimated)
-       - shaderdb: 1218 instructions, 658 nops, 560 non-nops, (1024 instlen), 44 half, 48 full
-       - shaderdb: 16 (ss), 510 (sy)
+       :6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l hr59.w, l[
+../src/freedreno/ir3/disasm-a3xx.c:173: regmask_set: Assertion `num < MAX_REG' failed.
        -----------------------------------------------
        8192 (0x2000) bytes
        000000: 00003002 00000000 00000000 00000000     |.0..............|
index 798c067..afb8e7f 100644 (file)
@@ -640,8 +640,8 @@ t4                                  write SP_VS_OBJ_START_LO (a81c)
                                                        - used (merged): (cnt=0, max=0)
                                                        - input (half): (cnt=0, max=0)
                                                        - input (full): (cnt=0, max=0)
-                                                       - const (half): (cnt=0, max=0)
-                                                       - const (full): (cnt=0, max=0)
+                                                       - max const: 0
+
                                                        - output (half): (cnt=0, max=0)  (estimated)
                                                        - output (full): (cnt=0, max=0)  (estimated)
                                                        - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
@@ -662,8 +662,8 @@ t7                                  opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
                                                - used (merged): (cnt=0, max=0)
                                                - input (half): (cnt=0, max=0)
                                                - input (full): (cnt=0, max=0)
-                                               - const (half): (cnt=0, max=0)
-                                               - const (full): (cnt=0, max=0)
+                                               - max const: 0
+
                                                - output (half): (cnt=0, max=0)  (estimated)
                                                - output (full): (cnt=0, max=0)  (estimated)
                                                - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
@@ -1110,8 +1110,8 @@ t7                        opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
                                - used (merged): (cnt=0, max=0)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
@@ -1955,8 +1955,8 @@ t4                                        write SP_VS_OBJ_START_LO (a81c)
                                                        - used (merged): (cnt=0, max=0)
                                                        - input (half): (cnt=0, max=0)
                                                        - input (full): (cnt=0, max=0)
-                                                       - const (half): (cnt=0, max=0)
-                                                       - const (full): (cnt=0, max=0)
+                                                       - max const: 0
+
                                                        - output (half): (cnt=0, max=0)  (estimated)
                                                        - output (full): (cnt=0, max=0)  (estimated)
                                                        - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
@@ -1977,8 +1977,8 @@ t7                                        opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
                                                - used (merged): (cnt=0, max=0)
                                                - input (half): (cnt=0, max=0)
                                                - input (full): (cnt=0, max=0)
-                                               - const (half): (cnt=0, max=0)
-                                               - const (full): (cnt=0, max=0)
+                                               - max const: 0
+
                                                - output (half): (cnt=0, max=0)  (estimated)
                                                - output (full): (cnt=0, max=0)  (estimated)
                                                - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
@@ -3498,8 +3498,8 @@ t4                                        write SP_FS_OBJ_START_LO (a983)
                                                        - used (merged): 0-147 (cnt=148, max=147)
                                                        - input (half): (cnt=0, max=0)
                                                        - input (full): 19-20 (cnt=2, max=20)
-                                                       - const (half): (cnt=0, max=0)
-                                                       - const (full): 0-1 3-5 8-9 32-113 (cnt=89, max=113)
+                                                       - max const: 113
+
                                                        - output (half): (cnt=0, max=0)  (estimated)
                                                        - output (full): 4-7 (cnt=4, max=7)  (estimated)
                                                        - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full
@@ -4921,8 +4921,8 @@ t7                                        opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
                                                - used (merged): 0-147 (cnt=148, max=147)
                                                - input (half): (cnt=0, max=0)
                                                - input (full): 19-20 (cnt=2, max=20)
-                                               - const (half): (cnt=0, max=0)
-                                               - const (full): 0-1 3-5 8-9 32-113 (cnt=89, max=113)
+                                               - max const: 113
+
                                                - output (half): (cnt=0, max=0)  (estimated)
                                                - output (full): 4-7 (cnt=4, max=7)  (estimated)
                                                - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full
@@ -5335,8 +5335,8 @@ t7                        opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
                                - used (merged): (cnt=0, max=0)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
@@ -6773,8 +6773,8 @@ t7                        opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
                                - used (merged): 0-147 (cnt=148, max=147)
                                - input (half): (cnt=0, max=0)
                                - input (full): 19-20 (cnt=2, max=20)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-1 3-5 8-9 32-113 (cnt=89, max=113)
+                               - max const: 113
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 4-7 (cnt=4, max=7)  (estimated)
                                - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full
index 78930e4..2abe7e8 100644 (file)
@@ -427,11 +427,10 @@ t3                        opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): (cnt=0, max=0)
-                               - used (merged): (cnt=0, max=0)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
@@ -453,11 +452,10 @@ t3                        opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-3 (cnt=4, max=3)
-                               - used (merged): 0-7 (cnt=8, max=7)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-3 (cnt=4, max=3)
+                               - max const: 3
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 0-3 (cnt=4, max=3)  (estimated)
                                - shaderdb: 9 instructions, 8 nops, 1 non-nops, (9 instlen), 0 half, 1 full
@@ -1041,11 +1039,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-13 (cnt=14, max=13)
-                               - used (merged): 0-27 (cnt=28, max=27)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-5 (cnt=4, max=5)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-18 20-26 32-34 36-38 40-42 52 (cnt=36, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 6-13 (cnt=8, max=13)  (estimated)
                                - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full
@@ -1082,14 +1079,13 @@ t3                      opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109ce878:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
@@ -1673,11 +1669,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
@@ -1713,14 +1708,13 @@ t3                      opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109cf040:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
@@ -2106,11 +2100,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
@@ -2145,11 +2138,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0 2-5 (cnt=5, max=5)
-                               - used (merged): 0-1 4-11 (cnt=10, max=11)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0 (cnt=1, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 2-5 (cnt=4, max=5)  (estimated)
                                - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full
@@ -2500,11 +2492,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-13 (cnt=14, max=13)
-                               - used (merged): 0-27 (cnt=28, max=27)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-5 (cnt=4, max=5)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-18 20-26 32-34 36-38 40-42 52 (cnt=36, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 6-13 (cnt=8, max=13)  (estimated)
                                - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full
@@ -2541,14 +2532,13 @@ t3                      opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109cfb78:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
@@ -3055,11 +3045,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
@@ -3095,14 +3084,13 @@ t3                      opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109d02c0:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
@@ -3488,11 +3476,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
@@ -3527,11 +3514,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0 2-5 (cnt=5, max=5)
-                               - used (merged): 0-1 4-11 (cnt=10, max=11)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0 (cnt=1, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 2-5 (cnt=4, max=5)  (estimated)
                                - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full
@@ -3882,11 +3868,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-13 (cnt=14, max=13)
-                               - used (merged): 0-27 (cnt=28, max=27)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-5 (cnt=4, max=5)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-18 20-26 32-34 36-38 40-42 52 (cnt=36, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 6-13 (cnt=8, max=13)  (estimated)
                                - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full
@@ -3923,14 +3908,13 @@ t3                      opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109d0df8:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
@@ -4437,11 +4421,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
@@ -4477,14 +4460,13 @@ t3                      opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                :0:0010:0010[00000000x_00000000x] nop
                                Register Stats:
                                - used (half): (cnt=0, max=0)
-                               - used (full): 0-3 252 (cnt=5, max=3)
-                               - used (merged): 0-7 504-505 (cnt=10, max=7)
+                               - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
-                               - output (full): 252 (cnt=1, max=0)  (estimated)
+                               - output (full): (cnt=0, max=0)  (estimated)
                                - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full
                                - shaderdb: 1 (ss), 0 (sy)
 109d1540:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
@@ -4870,11 +4852,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
-                               - used (merged): 0-17 20-35 (cnt=34, max=35)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): 0-22 28-30 32-34 36-38 52 (cnt=33, max=52)
+                               - max const: 52
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
                                - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full
@@ -4909,11 +4890,10 @@ t3                      opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                Register Stats:
                                - used (half): (cnt=0, max=0)
                                - used (full): 0 2-5 (cnt=5, max=5)
-                               - used (merged): 0-1 4-11 (cnt=10, max=11)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0 (cnt=1, max=0)
-                               - const (half): (cnt=0, max=0)
-                               - const (full): (cnt=0, max=0)
+                               - max const: 0
+
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 2-5 (cnt=4, max=5)  (estimated)
                                - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full
@@ -5214,11 +5194,10 @@ t3              opcode: CP_LOAD_STATE4 (30) (35 dwords)
                        Register Stats:
                        - used (half): (cnt=0, max=0)
                        - used (full): (cnt=0, max=0)
-                       - used (merged): (cnt=0, max=0)
                        - input (half): (cnt=0, max=0)
                        - input (full): (cnt=0, max=0)
-                       - const (half): (cnt=0, max=0)
-                       - const (full): (cnt=0, max=0)
+                       - max const: 0
+
                        - output (half): (cnt=0, max=0)  (estimated)
                        - output (full): (cnt=0, max=0)  (estimated)
                        - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full
@@ -5240,11 +5219,10 @@ t3              opcode: CP_LOAD_STATE4 (30) (35 dwords)
                        Register Stats:
                        - used (half): (cnt=0, max=0)
                        - used (full): 0-3 (cnt=4, max=3)
-                       - used (merged): 0-7 (cnt=8, max=7)
                        - input (half): (cnt=0, max=0)
                        - input (full): (cnt=0, max=0)
-                       - const (half): (cnt=0, max=0)
-                       - const (full): 0-3 (cnt=4, max=3)
+                       - max const: 3
+
                        - output (half): (cnt=0, max=0)  (estimated)
                        - output (full): 0-3 (cnt=4, max=3)  (estimated)
                        - shaderdb: 9 instructions, 8 nops, 1 non-nops, (9 instlen), 0 half, 1 full
similarity index 90%
rename from src/freedreno/decode/disasm.h
rename to src/freedreno/common/disasm.h
index de89ab4..1a2993e 100644 (file)
@@ -24,6 +24,8 @@
 #ifndef DISASM_H_
 #define DISASM_H_
 
+#include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
 
 #include "compiler/shader_enums.h"
@@ -32,7 +34,8 @@
 enum debug_t {
        PRINT_RAW      = 0x1,    /* dump raw hexdump */
        PRINT_VERBOSE  = 0x2,
-       EXPAND_REPEAT  = 0x4,
+       PRINT_STATS    = 0x4,
+       EXPAND_REPEAT  = 0x8,
 };
 
 struct shader_stats {
@@ -47,6 +50,8 @@ int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage typ
 int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
 int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
                unsigned gpu_id, struct shader_stats *stats);
-void disasm_set_debug(enum debug_t debug);
+
+void disasm_a2xx_set_debug(enum debug_t debug);
+void disasm_a3xx_set_debug(enum debug_t debug);
 
 #endif /* DISASM_H_ */
index a79fe8e..32b3f08 100644 (file)
@@ -21,6 +21,7 @@
 libfreedreno_common = static_library(
   'freedreno_common',
   [
+    'disasm.h',
     'freedreno_uuid.c',
     'freedreno_uuid.h',
     'freedreno_guardband.h',
index 7fec7dc..a9ceacd 100644 (file)
@@ -139,6 +139,7 @@ static const struct option opts[] = {
 
 int main(int argc, char **argv)
 {
+       enum debug_t debug = PRINT_RAW | PRINT_STATS;
        int ret = -1;
        int start = 0, end = 0x7ffffff, draw = -1;
        int c;
@@ -153,7 +154,7 @@ int main(int argc, char **argv)
                        /* option that set a flag, nothing to do */
                        break;
                case 'v':
-                       disasm_set_debug(PRINT_RAW | EXPAND_REPEAT | PRINT_VERBOSE);
+                       debug |= (PRINT_RAW | EXPAND_REPEAT | PRINT_VERBOSE);
                        break;
                case 's':
                        options.summary = true;
@@ -192,6 +193,9 @@ int main(int argc, char **argv)
                }
        }
 
+       disasm_a2xx_set_debug(debug);
+       disasm_a3xx_set_debug(debug);
+
        if (interactive) {
                pager_open();
        }
index 3b17d83..8c5ccb5 100644 (file)
@@ -55,7 +55,7 @@
 #include "pager.h"
 #include "rnnutil.h"
 #include "util.h"
-#include "instr-a3xx.h"
+#include "ir3/instr-a3xx.h"
 
 
 static FILE *in;
@@ -223,7 +223,7 @@ void
 ir3_assert_handler(const char *expr, const char *file, int line,
                const char *func)
 {
-       printf("%s:%u: %s: Assertion `%s' failed.\n", file, line, func, expr);
+       printf("\n%s:%u: %s: Assertion `%s' failed.\n", file, line, func, expr);
        if (jmp_env_valid)
                longjmp(jmp_env, 1);
        abort();
@@ -1103,6 +1103,8 @@ main(int argc, char **argv)
                }
        }
 
+       disasm_a3xx_set_debug(PRINT_RAW);
+
        if (interactive) {
                pager_open();
        }
index 80b8a00..e6d7ba3 100644 (file)
@@ -49,7 +49,7 @@ static const char *levels[] = {
                "x",
 };
 
-enum debug_t debug;
+static enum debug_t debug;
 
 static struct rnn *rnn;
 
@@ -618,7 +618,7 @@ int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage typ
        return 0;
 }
 
-void disasm_set_debug(enum debug_t d)
+void disasm_a2xx_set_debug(enum debug_t d)
 {
        debug = d;
 }
diff --git a/src/freedreno/decode/disasm-a3xx.c b/src/freedreno/decode/disasm-a3xx.c
deleted file mode 100644 (file)
index 9645dc5..0000000
+++ /dev/null
@@ -1,1641 +0,0 @@
-/*
- * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <string.h>
-#include <assert.h>
-
-#include "disasm.h"
-#include "instr-a3xx.h"
-
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
-extern enum debug_t debug;
-
-static const char *levels[] = {
-               "",
-               "\t",
-               "\t\t",
-               "\t\t\t",
-               "\t\t\t\t",
-               "\t\t\t\t\t",
-               "\t\t\t\t\t\t",
-               "\t\t\t\t\t\t\t",
-               "\t\t\t\t\t\t\t\t",
-               "\t\t\t\t\t\t\t\t\t",
-               "x",
-               "x",
-               "x",
-               "x",
-               "x",
-               "x",
-};
-
-static const char *component = "xyzw";
-
-static const char *type[] = {
-               [TYPE_F16] = "f16",
-               [TYPE_F32] = "f32",
-               [TYPE_U16] = "u16",
-               [TYPE_U32] = "u32",
-               [TYPE_S16] = "s16",
-               [TYPE_S32] = "s32",
-               [TYPE_U8]  = "u8",
-               [TYPE_S8]  = "s8",
-};
-
-
-#define MAX_REG 4096
-
-typedef struct {
-       uint8_t full[MAX_REG/8];
-       uint8_t half[MAX_REG/8];
-} regmask_t;
-
-struct disasm_ctx {
-       FILE *out;
-       int level;
-       unsigned gpu_id;
-
-       struct shader_stats *stats;
-
-       /* we have to process the dst register after src to avoid tripping up
-        * the read-before-write detection
-        */
-       unsigned last_dst;
-       bool last_dst_full;
-       bool last_dst_valid;
-
-       /* current instruction repeat flag: */
-       unsigned repeat;
-       /* current instruction repeat indx/offset (for --expand): */
-       unsigned repeatidx;
-
-       /* tracking for register usage */
-       struct {
-               regmask_t used;
-               regmask_t used_merged;
-               regmask_t rbw;      /* read before write */
-               regmask_t war;      /* write after read */
-               regmask_t cnst;     /* used consts */
-       } regs;
-};
-
-static const char *float_imms[] = {
-       "0.0",
-       "0.5",
-       "1.0",
-       "2.0",
-       "e",
-       "pi",
-       "1/pi",
-       "1/log2(e)",
-       "log2(e)",
-       "1/log2(10)",
-       "log2(10)",
-       "4.0",
-};
-
-static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full,
-               bool is_float, bool r,
-               bool c, bool im, bool neg, bool abs, bool addr_rel)
-{
-       const char type = c ? 'c' : 'r';
-
-       // XXX I prefer - and || for neg/abs, but preserving format used
-       // by libllvm-a3xx for easy diffing..
-
-       if (abs && neg)
-               fprintf(ctx->out, "(absneg)");
-       else if (neg)
-               fprintf(ctx->out, "(neg)");
-       else if (abs)
-               fprintf(ctx->out, "(abs)");
-
-       if (r)
-               fprintf(ctx->out, "(r)");
-
-       if (im) {
-               if (is_float && full && reg.iim_val < ARRAY_SIZE(float_imms)) {
-                       fprintf(ctx->out, "(%s)", float_imms[reg.iim_val]);
-               } else {
-                       fprintf(ctx->out, "%d", reg.iim_val);
-               }
-       } else if (addr_rel) {
-               /* I would just use %+d but trying to make it diff'able with
-                * libllvm-a3xx...
-                */
-               if (reg.iim_val < 0)
-                       fprintf(ctx->out, "%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
-               else if (reg.iim_val > 0)
-                       fprintf(ctx->out, "%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
-               else
-                       fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
-       } else if ((reg.num == REG_A0) && !c) {
-               /* This matches libllvm output, the second (scalar) address register
-                * seems to be called a1.x instead of a0.y.
-                */
-               fprintf(ctx->out, "a%d.x", reg.comp);
-       } else if ((reg.num == REG_P0) && !c) {
-               fprintf(ctx->out, "p0.%c", component[reg.comp]);
-       } else {
-               fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
-       }
-}
-
-/* Tracking for registers used, read-before-write (input), and
- * write-after-read (output.. but not 100%)..
- */
-
-static void regmask_set(regmask_t *regmask, unsigned num, bool full, unsigned val)
-{
-       unsigned i = num / 8;
-       unsigned j = num % 8;
-       ir3_assert(num < MAX_REG);
-       if (full) {
-               regmask->full[i] = (regmask->full[i] & ~(1 << j)) | (val << j);
-       } else {
-               regmask->half[i] = (regmask->half[i] & ~(1 << j)) | (val << j);
-       }
-}
-
-static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full)
-{
-       unsigned i = num / 8;
-       unsigned j = num % 8;
-       ir3_assert(num < MAX_REG);
-       if (full) {
-               return (regmask->full[i] >> j) & 0x1;
-       } else {
-               return (regmask->half[i] >> j) & 0x1;
-       }
-}
-
-static unsigned regidx(reg_t reg)
-{
-       return (4 * reg.num) + reg.comp;
-}
-
-static reg_t idxreg(unsigned idx)
-{
-       return (reg_t){
-               .comp = idx & 0x3,
-               .num  = idx >> 2,
-       };
-}
-
-static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full)
-{
-       int num, max = 0, cnt = 0;
-       int first, last;
-
-       void print_sequence(void)
-       {
-               if (first != MAX_REG) {
-                       if (first == last) {
-                               fprintf(ctx->out, " %d", first);
-                       } else {
-                               fprintf(ctx->out, " %d-%d", first, last);
-                       }
-               }
-       }
-
-       first = last = MAX_REG;
-
-       for (num = 0; num < MAX_REG; num++) {
-               if (regmask_get(regmask, num, full)) {
-                       if (num != (last + 1)) {
-                               print_sequence();
-                               first = num;
-                       }
-                       last = num;
-                       if (num < (48*4))
-                               max = num;
-                       cnt++;
-               }
-       }
-
-       print_sequence();
-
-       fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max);
-
-       return max;
-}
-
-static void print_reg_stats(struct disasm_ctx *ctx)
-{
-       int fullreg, halfreg;
-
-       fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]);
-       fprintf(ctx->out, "%s- used (half):", levels[ctx->level]);
-       halfreg = print_regs(ctx, &ctx->regs.used, false);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- used (full):", levels[ctx->level]);
-       fullreg = print_regs(ctx, &ctx->regs.used, true);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.used_merged, false);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- input (half):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.rbw, false);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- input (full):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.rbw, true);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- const (half):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.cnst, false);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- const (full):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.cnst, true);
-       fprintf(ctx->out, "\n");
-       fprintf(ctx->out, "%s- output (half):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.war, false);
-       fprintf(ctx->out, "  (estimated)\n");
-       fprintf(ctx->out, "%s- output (full):", levels[ctx->level]);
-       print_regs(ctx, &ctx->regs.war, true);
-       fprintf(ctx->out, "  (estimated)\n");
-
-       /* convert to vec4, which is the granularity that registers are
-        * assigned to shader:
-        */
-       fullreg = (fullreg + 3) / 4;
-       halfreg = (halfreg + 3) / 4;
-
-       // Note this count of instructions includes rptN, which matches
-       // up to how mesa prints this:
-       fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, "
-                       "(%d instlen), %d half, %d full\n",
-                       levels[ctx->level], ctx->stats->instructions, ctx->stats->nops,
-                       ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen,
-                       halfreg, fullreg);
-       fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level],
-                       ctx->stats->ss, ctx->stats->sy);
-}
-
-static void process_reg_dst(struct disasm_ctx *ctx)
-{
-       int i;
-
-       if (!ctx->last_dst_valid)
-               return;
-
-       for (i = 0; i <= ctx->repeat; i++) {
-               unsigned dst = ctx->last_dst + i;
-
-               regmask_set(&ctx->regs.war, dst, ctx->last_dst_full, 1);
-               regmask_set(&ctx->regs.used, dst, ctx->last_dst_full, 1);
-
-               if (ctx->last_dst_full) {
-                       regmask_set(&ctx->regs.used_merged, (dst*2)+0, false, 1);
-                       regmask_set(&ctx->regs.used_merged, (dst*2)+1, false, 1);
-               } else {
-                       regmask_set(&ctx->regs.used_merged, dst, false, 1);
-               }
-       }
-
-       ctx->last_dst_valid = false;
-}
-
-static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel)
-{
-       /* presumably the special registers a0.c and p0.c don't count.. */
-       if (!(addr_rel || (reg.num == 61) || (reg.num == 62))) {
-               ctx->last_dst = regidx(reg);
-               ctx->last_dst_full = full;
-               ctx->last_dst_valid = true;
-       }
-       reg = idxreg(regidx(reg) + ctx->repeatidx);
-       print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel);
-}
-
-static void print_reg_src(struct disasm_ctx *ctx, reg_t reg, bool full, bool f, bool r,
-               bool c, bool im, bool neg, bool abs, bool addr_rel)
-{
-       /* presumably the special registers a0.c and p0.c don't count.. */
-       if (!(addr_rel || c || im || (reg.num == 61) || (reg.num == 62))) {
-               int i, num = regidx(reg);
-               for (i = 0; i <= ctx->repeat; i++) {
-                       unsigned src = num + i;
-
-                       if (!regmask_get(&ctx->regs.used, src, full))
-                               regmask_set(&ctx->regs.rbw, src, full, 1);
-
-                       regmask_set(&ctx->regs.war, src, full, 0);
-                       regmask_set(&ctx->regs.used, src, full, 1);
-
-                       if (full) {
-                               regmask_set(&ctx->regs.used_merged, (src*2)+0, false, 1);
-                               regmask_set(&ctx->regs.used_merged, (src*2)+1, false, 1);
-                       } else {
-                               regmask_set(&ctx->regs.used_merged, src, false, 1);
-                       }
-
-                       if (!r)
-                               break;
-               }
-       } else if (c) {
-               int i, num = regidx(reg);
-               for (i = 0; i <= ctx->repeat; i++) {
-                       unsigned src = num + i;
-
-                       regmask_set(&ctx->regs.cnst, src, full, 1);
-
-                       if (!r)
-                               break;
-               }
-
-               unsigned max = (num + ctx->repeat + 1 + 3) / 4;
-               if (max > ctx->stats->constlen)
-                       ctx->stats->constlen = max;
-       }
-
-       if (r)
-               reg = idxreg(regidx(reg) + ctx->repeatidx);
-
-       print_reg(ctx, reg, full, f, r, c, im, neg, abs, addr_rel);
-}
-
-/* TODO switch to using reginfo struct everywhere, since more readable
- * than passing a bunch of bools to print_reg_src
- */
-
-struct reginfo {
-       reg_t reg;
-       bool full;
-       bool r;
-       bool c;
-       bool f; /* src reg is interpreted as float, used for printing immediates */
-       bool im;
-       bool neg;
-       bool abs;
-       bool addr_rel;
-};
-
-static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
-{
-       reg_t reg = info->reg;
-
-       if (info->r)
-               reg = idxreg(regidx(info->reg) + ctx->repeatidx);
-
-       print_reg_src(ctx, reg, info->full, info->f, info->r, info->c, info->im,
-                       info->neg, info->abs, info->addr_rel);
-}
-
-//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info)
-//{
-//     print_reg_dst(ctx, info->reg, info->full, info->addr_rel);
-//}
-
-static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr)
-{
-       static const struct {
-               const char *suffix;
-               int nsrc;
-               bool idx;
-       } brinfo[7] = {
-               [BRANCH_PLAIN] = { "r",   1, false },
-               [BRANCH_OR]    = { "rao", 2, false },
-               [BRANCH_AND]   = { "raa", 2, false },
-               [BRANCH_CONST] = { "rac", 0, true  },
-               [BRANCH_ANY]   = { "any", 1, false },
-               [BRANCH_ALL]   = { "all", 1, false },
-               [BRANCH_X]     = { "rax", 0, false },
-       };
-       instr_cat0_t *cat0 = &instr->cat0;
-
-       switch (instr_opc(instr, ctx->gpu_id)) {
-       case OPC_KILL:
-       case OPC_PREDT:
-       case OPC_PREDF:
-               fprintf(ctx->out, " %sp0.%c", cat0->inv0 ? "!" : "",
-                               component[cat0->comp0]);
-               break;
-       case OPC_B:
-               fprintf(ctx->out, "%s", brinfo[cat0->brtype].suffix);
-               if (brinfo[cat0->brtype].idx) {
-                       fprintf(ctx->out, ".%u", cat0->idx);
-               }
-               if (brinfo[cat0->brtype].nsrc >= 1) {
-                       fprintf(ctx->out, " %sp0.%c,", cat0->inv0 ? "!" : "",
-                                       component[cat0->comp0]);
-               }
-               if (brinfo[cat0->brtype].nsrc >= 2) {
-                       fprintf(ctx->out, " %sp0.%c,", cat0->inv1 ? "!" : "",
-                                       component[cat0->comp1]);
-               }
-               fprintf(ctx->out, " #%d", cat0->a3xx.immed);
-               break;
-       case OPC_JUMP:
-       case OPC_CALL:
-       case OPC_BKT:
-       case OPC_GETONE:
-       case OPC_SHPS:
-               fprintf(ctx->out, " #%d", cat0->a3xx.immed);
-               break;
-       }
-
-       if ((debug & PRINT_VERBOSE) && (cat0->dummy3|cat0->dummy4))
-               fprintf(ctx->out, "\t{0: %x,%x}", cat0->dummy3, cat0->dummy4);
-}
-
-static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat1_t *cat1 = &instr->cat1;
-
-       if (cat1->ul)
-               fprintf(ctx->out, "(ul)");
-
-       if (cat1->src_type == cat1->dst_type) {
-               if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
-                       /* special case (nmemonic?): */
-                       fprintf(ctx->out, "mova");
-               } else {
-                       fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
-               }
-       } else {
-               fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
-       }
-
-       fprintf(ctx->out, " ");
-
-       if (cat1->even)
-               fprintf(ctx->out, "(even)");
-
-       if (cat1->pos_inf)
-               fprintf(ctx->out, "(pos_infinity)");
-
-       print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
-                       cat1->dst_rel);
-
-       fprintf(ctx->out, ", ");
-
-       /* ugg, have to special case this.. vs print_reg().. */
-       if (cat1->src_im) {
-               if (type_float(cat1->src_type))
-                       fprintf(ctx->out, "(%f)", cat1->fim_val);
-               else if (type_uint(cat1->src_type))
-                       fprintf(ctx->out, "0x%08x", cat1->uim_val);
-               else
-                       fprintf(ctx->out, "%d", cat1->iim_val);
-       } else if (cat1->src_rel && !cat1->src_c) {
-               /* I would just use %+d but trying to make it diff'able with
-                * libllvm-a3xx...
-                */
-               char type = cat1->src_rel_c ? 'c' : 'r';
-               const char *full = (type_size(cat1->src_type) == 32) ? "" : "h";
-               if (cat1->off < 0)
-                       fprintf(ctx->out, "%s%c<a0.x - %d>", full, type, -cat1->off);
-               else if (cat1->off > 0)
-                       fprintf(ctx->out, "%s%c<a0.x + %d>", full, type, cat1->off);
-               else
-                       fprintf(ctx->out, "%s%c<a0.x>", full, type);
-       } else {
-               struct reginfo src = {
-                       .reg = (reg_t)cat1->src,
-                       .full = type_size(cat1->src_type) == 32,
-                       .r = cat1->src_r,
-                       .c = cat1->src_c,
-                       .im = cat1->src_im,
-               };
-               print_src(ctx, &src);
-       }
-
-       if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
-               fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0);
-}
-
-static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat2_t *cat2 = &instr->cat2;
-       int opc = _OPC(2, cat2->opc);
-       static const char *cond[] = {
-                       "lt",
-                       "le",
-                       "gt",
-                       "ge",
-                       "eq",
-                       "ne",
-                       "?6?",
-       };
-
-       switch (opc) {
-       case OPC_CMPS_F:
-       case OPC_CMPS_U:
-       case OPC_CMPS_S:
-       case OPC_CMPV_F:
-       case OPC_CMPV_U:
-       case OPC_CMPV_S:
-               fprintf(ctx->out, ".%s", cond[cat2->cond]);
-               break;
-       }
-
-       fprintf(ctx->out, " ");
-       if (cat2->ei)
-               fprintf(ctx->out, "(ei)");
-       print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
-       fprintf(ctx->out, ", ");
-
-       struct reginfo src1 = {
-               .full = cat2->full,
-               .r = cat2->repeat ? cat2->src1_r : 0,
-               .f = is_cat2_float(opc),
-               .im = cat2->src1_im,
-               .abs = cat2->src1_abs,
-               .neg = cat2->src1_neg,
-       };
-
-       if (cat2->c1.src1_c) {
-               src1.reg = (reg_t)(cat2->c1.src1);
-               src1.c = true;
-       } else if (cat2->rel1.src1_rel) {
-               src1.reg = (reg_t)(cat2->rel1.src1);
-               src1.c = cat2->rel1.src1_c;
-               src1.addr_rel = true;
-       } else {
-               src1.reg = (reg_t)(cat2->src1);
-       }
-       print_src(ctx, &src1);
-
-       struct reginfo src2 = {
-               .r = cat2->repeat ? cat2->src2_r : 0,
-               .full = cat2->full,
-               .f = is_cat2_float(opc),
-               .abs = cat2->src2_abs,
-               .neg = cat2->src2_neg,
-               .im = cat2->src2_im,
-       };
-       switch (opc) {
-       case OPC_ABSNEG_F:
-       case OPC_ABSNEG_S:
-       case OPC_CLZ_B:
-       case OPC_CLZ_S:
-       case OPC_SIGN_F:
-       case OPC_FLOOR_F:
-       case OPC_CEIL_F:
-       case OPC_RNDNE_F:
-       case OPC_RNDAZ_F:
-       case OPC_TRUNC_F:
-       case OPC_NOT_B:
-       case OPC_BFREV_B:
-       case OPC_SETRM:
-       case OPC_CBITS_B:
-               /* these only have one src reg */
-               break;
-       default:
-               fprintf(ctx->out, ", ");
-               if (cat2->c2.src2_c) {
-                       src2.reg = (reg_t)(cat2->c2.src2);
-                       src2.c = true;
-               } else if (cat2->rel2.src2_rel) {
-                       src2.reg = (reg_t)(cat2->rel2.src2);
-                       src2.c = cat2->rel2.src2_c;
-                       src2.addr_rel = true;
-               } else {
-                       src2.reg = (reg_t)(cat2->src2);
-               }
-               print_src(ctx, &src2);
-               break;
-       }
-}
-
-static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat3_t *cat3 = &instr->cat3;
-       bool full = instr_cat3_full(cat3);
-
-       fprintf(ctx->out, " ");
-       print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false);
-       fprintf(ctx->out, ", ");
-
-       struct reginfo src1 = {
-               .r = cat3->repeat ? cat3->src1_r : 0,
-               .full = full,
-               .neg = cat3->src1_neg,
-       };
-       if (cat3->c1.src1_c) {
-               src1.reg = (reg_t)(cat3->c1.src1);
-               src1.c = true;
-       } else if (cat3->rel1.src1_rel) {
-               src1.reg = (reg_t)(cat3->rel1.src1);
-               src1.c = cat3->rel1.src1_c;
-               src1.addr_rel = true;
-       } else {
-               src1.reg = (reg_t)(cat3->src1);
-       }
-       print_src(ctx, &src1);
-
-       fprintf(ctx->out, ", ");
-       struct reginfo src2 = {
-               .reg = (reg_t)cat3->src2,
-               .full = full,
-               .r = cat3->repeat ? cat3->src2_r : 0,
-               .c = cat3->src2_c,
-               .neg = cat3->src2_neg,
-       };
-       print_src(ctx, &src2);
-
-       fprintf(ctx->out, ", ");
-       struct reginfo src3 = {
-               .r = cat3->src3_r,
-               .full = full,
-               .neg = cat3->src3_neg,
-       };
-       if (cat3->c2.src3_c) {
-               src3.reg = (reg_t)(cat3->c2.src3);
-               src3.c = true;
-       } else if (cat3->rel2.src3_rel) {
-               src3.reg = (reg_t)(cat3->rel2.src3);
-               src3.c = cat3->rel2.src3_c;
-               src3.addr_rel = true;
-       } else {
-               src3.reg = (reg_t)(cat3->src3);
-       }
-       print_src(ctx, &src3);
-}
-
-static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat4_t *cat4 = &instr->cat4;
-
-       fprintf(ctx->out, " ");
-       print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
-       fprintf(ctx->out, ", ");
-
-       struct reginfo src = {
-               .r = cat4->src_r,
-               .im = cat4->src_im,
-               .full = cat4->full,
-               .neg = cat4->src_neg,
-               .abs = cat4->src_abs,
-       };
-       if (cat4->c.src_c) {
-               src.reg = (reg_t)(cat4->c.src);
-               src.c = true;
-       } else if (cat4->rel.src_rel) {
-               src.reg = (reg_t)(cat4->rel.src);
-               src.c = cat4->rel.src_c;
-               src.addr_rel = true;
-       } else {
-               src.reg = (reg_t)(cat4->src);
-       }
-       print_src(ctx, &src);
-
-       if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
-               fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
-}
-
-static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
-{
-       static const struct {
-               bool src1, src2, samp, tex;
-       } info[0x1f] = {
-                       [opc_op(OPC_ISAM)]     = { true,  false, true,  true,  },
-                       [opc_op(OPC_ISAML)]    = { true,  true,  true,  true,  },
-                       [opc_op(OPC_ISAMM)]    = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAM)]      = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAMB)]     = { true,  true,  true,  true,  },
-                       [opc_op(OPC_SAML)]     = { true,  true,  true,  true,  },
-                       [opc_op(OPC_SAMGQ)]    = { true,  false, true,  true,  },
-                       [opc_op(OPC_GETLOD)]   = { true,  false, true,  true,  },
-                       [opc_op(OPC_CONV)]     = { true,  true,  true,  true,  },
-                       [opc_op(OPC_CONVM)]    = { true,  true,  true,  true,  },
-                       [opc_op(OPC_GETSIZE)]  = { true,  false, false, true,  },
-                       [opc_op(OPC_GETBUF)]   = { false, false, false, true,  },
-                       [opc_op(OPC_GETPOS)]   = { true,  false, false, true,  },
-                       [opc_op(OPC_GETINFO)]  = { false, false, false, true,  },
-                       [opc_op(OPC_DSX)]      = { true,  false, false, false, },
-                       [opc_op(OPC_DSY)]      = { true,  false, false, false, },
-                       [opc_op(OPC_GATHER4R)] = { true,  false, true,  true,  },
-                       [opc_op(OPC_GATHER4G)] = { true,  false, true,  true,  },
-                       [opc_op(OPC_GATHER4B)] = { true,  false, true,  true,  },
-                       [opc_op(OPC_GATHER4A)] = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAMGP0)]   = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAMGP1)]   = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAMGP2)]   = { true,  false, true,  true,  },
-                       [opc_op(OPC_SAMGP3)]   = { true,  false, true,  true,  },
-                       [opc_op(OPC_DSXPP_1)]  = { true,  false, false, false, },
-                       [opc_op(OPC_DSYPP_1)]  = { true,  false, false, false, },
-                       [opc_op(OPC_RGETPOS)]  = { true,  false, false, false, },
-                       [opc_op(OPC_RGETINFO)] = { false, false, false, false, },
-       };
-
-       static const struct {
-               bool indirect;
-               bool bindless;
-               bool use_a1;
-               bool uniform;
-       } desc_features[8] = {
-               [CAT5_NONUNIFORM] = { .indirect = true, },
-               [CAT5_UNIFORM] = { .indirect = true, .uniform = true, },
-               [CAT5_BINDLESS_IMM] = { .bindless = true, },
-               [CAT5_BINDLESS_UNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-                       .uniform = true,
-               },
-               [CAT5_BINDLESS_NONUNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-               },
-               [CAT5_BINDLESS_A1_IMM] = {
-                       .bindless = true,
-                       .use_a1 = true,
-               },
-               [CAT5_BINDLESS_A1_UNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-                       .uniform = true,
-                       .use_a1 = true,
-               },
-               [CAT5_BINDLESS_A1_NONUNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-                       .use_a1 = true,
-               },
-       };
-
-       instr_cat5_t *cat5 = &instr->cat5;
-       int i;
-
-       bool desc_indirect =
-               cat5->is_s2en_bindless &&
-               desc_features[cat5->s2en_bindless.desc_mode].indirect;
-       bool bindless =
-               cat5->is_s2en_bindless &&
-               desc_features[cat5->s2en_bindless.desc_mode].bindless;
-       bool use_a1 =
-               cat5->is_s2en_bindless &&
-               desc_features[cat5->s2en_bindless.desc_mode].use_a1;
-       bool uniform =
-               cat5->is_s2en_bindless &&
-               desc_features[cat5->s2en_bindless.desc_mode].uniform;
-
-       if (cat5->is_3d)   fprintf(ctx->out, ".3d");
-       if (cat5->is_a)    fprintf(ctx->out, ".a");
-       if (cat5->is_o)    fprintf(ctx->out, ".o");
-       if (cat5->is_p)    fprintf(ctx->out, ".p");
-       if (cat5->is_s)    fprintf(ctx->out, ".s");
-       if (desc_indirect) fprintf(ctx->out, ".s2en");
-       if (uniform)       fprintf(ctx->out, ".uniform");
-
-       if (bindless) {
-               unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo;
-               fprintf(ctx->out, ".base%d", base);
-       }
-
-       fprintf(ctx->out, " ");
-
-       switch (_OPC(5, cat5->opc)) {
-       case OPC_DSXPP_1:
-       case OPC_DSYPP_1:
-               break;
-       default:
-               fprintf(ctx->out, "(%s)", type[cat5->type]);
-               break;
-       }
-
-       fprintf(ctx->out, "(");
-       for (i = 0; i < 4; i++)
-               if (cat5->wrmask & (1 << i))
-                       fprintf(ctx->out, "%c", "xyzw"[i]);
-       fprintf(ctx->out, ")");
-
-       print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
-
-       if (info[cat5->opc].src1) {
-               fprintf(ctx->out, ", ");
-               struct reginfo src = { .reg = (reg_t)(cat5->src1), .full = cat5->full };
-               print_src(ctx, &src);
-       }
-
-       if (cat5->is_o || info[cat5->opc].src2) {
-               fprintf(ctx->out, ", ");
-               struct reginfo src = { .reg = (reg_t)(cat5->src2), .full = cat5->full };
-               print_src(ctx, &src);
-       }
-       if (cat5->is_s2en_bindless) {
-               if (!desc_indirect) {
-                       if (info[cat5->opc].samp) {
-                               if (use_a1)
-                                       fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3);
-                               else
-                                       fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf);
-                       }
-
-                       if (info[cat5->opc].tex && !use_a1) {
-                               fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4);
-                       }
-               }
-       } else {
-               if (info[cat5->opc].samp)
-                       fprintf(ctx->out, ", s#%d", cat5->norm.samp);
-               if (info[cat5->opc].tex)
-                       fprintf(ctx->out, ", t#%d", cat5->norm.tex);
-       }
-
-       if (desc_indirect) {
-               fprintf(ctx->out, ", ");
-               struct reginfo src = { .reg = (reg_t)(cat5->s2en_bindless.src3), .full = bindless };
-               print_src(ctx, &src);
-       }
-
-       if (use_a1)
-               fprintf(ctx->out, ", a1.x");
-
-       if (debug & PRINT_VERBOSE) {
-               if (cat5->is_s2en_bindless) {
-                       if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1)
-                               fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1);
-               } else {
-                       if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1)
-                               fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1);
-               }
-       }
-}
-
-static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat6_t *cat6 = &instr->cat6;
-       char sd = 0, ss = 0;  /* dst/src address space */
-       bool nodst = false;
-       struct reginfo dst, src1, src2;
-       int src1off = 0, dstoff = 0;
-
-       memset(&dst, 0, sizeof(dst));
-       memset(&src1, 0, sizeof(src1));
-       memset(&src2, 0, sizeof(src2));
-
-       switch (_OPC(6, cat6->opc)) {
-       case OPC_RESINFO:
-       case OPC_RESFMT:
-               dst.full  = type_size(cat6->type) == 32;
-               src1.full = type_size(cat6->type) == 32;
-               src2.full = type_size(cat6->type) == 32;
-               break;
-       case OPC_L2G:
-       case OPC_G2L:
-               dst.full = true;
-               src1.full = true;
-               src2.full = true;
-               break;
-       case OPC_STG:
-       case OPC_STL:
-       case OPC_STP:
-       case OPC_STLW:
-       case OPC_STIB:
-               dst.full  = type_size(cat6->type) == 32;
-               src1.full = type_size(cat6->type) == 32;
-               src2.full = type_size(cat6->type) == 32;
-               break;
-       default:
-               dst.full  = type_size(cat6->type) == 32;
-               src1.full = true;
-               src2.full = true;
-               break;
-       }
-
-       switch (_OPC(6, cat6->opc)) {
-       case OPC_PREFETCH:
-               break;
-       case OPC_RESINFO:
-               fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
-               break;
-       case OPC_LDGB:
-               fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
-               fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
-               fprintf(ctx->out, ".%s", type[cat6->type]);
-               fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
-               break;
-       case OPC_STGB:
-       case OPC_STIB:
-               fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped");
-               fprintf(ctx->out, ".%dd", cat6->stgb.d + 1);
-               fprintf(ctx->out, ".%s", type[cat6->type]);
-               fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1);
-               break;
-       case OPC_ATOMIC_ADD:
-       case OPC_ATOMIC_SUB:
-       case OPC_ATOMIC_XCHG:
-       case OPC_ATOMIC_INC:
-       case OPC_ATOMIC_DEC:
-       case OPC_ATOMIC_CMPXCHG:
-       case OPC_ATOMIC_MIN:
-       case OPC_ATOMIC_MAX:
-       case OPC_ATOMIC_AND:
-       case OPC_ATOMIC_OR:
-       case OPC_ATOMIC_XOR:
-               ss = cat6->g ? 'g' : 'l';
-               fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
-               fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
-               fprintf(ctx->out, ".%s", type[cat6->type]);
-               fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
-               fprintf(ctx->out, ".%c", ss);
-               break;
-       default:
-               dst.im = cat6->g && !cat6->dst_off;
-               fprintf(ctx->out, ".%s", type[cat6->type]);
-               break;
-       }
-       fprintf(ctx->out, " ");
-
-       switch (_OPC(6, cat6->opc)) {
-       case OPC_STG:
-               sd = 'g';
-               break;
-       case OPC_STP:
-               sd = 'p';
-               break;
-       case OPC_STL:
-       case OPC_STLW:
-               sd = 'l';
-               break;
-
-       case OPC_LDG:
-       case OPC_LDC:
-               ss = 'g';
-               break;
-       case OPC_LDP:
-               ss = 'p';
-               break;
-       case OPC_LDL:
-       case OPC_LDLW:
-       case OPC_LDLV:
-               ss = 'l';
-               break;
-
-       case OPC_L2G:
-               ss = 'l';
-               sd = 'g';
-               break;
-
-       case OPC_G2L:
-               ss = 'g';
-               sd = 'l';
-               break;
-
-       case OPC_PREFETCH:
-               ss = 'g';
-               nodst = true;
-               break;
-       }
-
-       if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) {
-               struct reginfo src3;
-
-               memset(&src3, 0, sizeof(src3));
-
-               src1.reg = (reg_t)(cat6->stgb.src1);
-               src2.reg = (reg_t)(cat6->stgb.src2);
-               src2.im  = cat6->stgb.src2_im;
-               src3.reg = (reg_t)(cat6->stgb.src3);
-               src3.im  = cat6->stgb.src3_im;
-               src3.full = true;
-
-               fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo);
-               print_src(ctx, &src1);
-               fprintf(ctx->out, ", ");
-               print_src(ctx, &src2);
-               fprintf(ctx->out, ", ");
-               print_src(ctx, &src3);
-
-               if (debug & PRINT_VERBOSE)
-                       fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3);
-
-               return;
-       }
-
-       if (is_atomic(_OPC(6, cat6->opc))) {
-
-               src1.reg = (reg_t)(cat6->ldgb.src1);
-               src1.im  = cat6->ldgb.src1_im;
-               src2.reg = (reg_t)(cat6->ldgb.src2);
-               src2.im  = cat6->ldgb.src2_im;
-               dst.reg  = (reg_t)(cat6->ldgb.dst);
-
-               print_src(ctx, &dst);
-               fprintf(ctx->out, ", ");
-               if (ss == 'g') {
-                       struct reginfo src3;
-                       memset(&src3, 0, sizeof(src3));
-
-                       src3.reg = (reg_t)(cat6->ldgb.src3);
-                       src3.full = true;
-
-                       /* For images, the ".typed" variant is used and src2 is
-                        * the ivecN coordinates, ie ivec2 for 2d.
-                        *
-                        * For SSBOs, the ".untyped" variant is used and src2 is
-                        * a simple dword offset..  src3 appears to be
-                        * uvec2(offset * 4, 0).  Not sure the point of that.
-                        */
-
-                       fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
-                       print_src(ctx, &src1);  /* value */
-                       fprintf(ctx->out, ", ");
-                       print_src(ctx, &src2);  /* offset/coords */
-                       fprintf(ctx->out, ", ");
-                       print_src(ctx, &src3);  /* 64b byte offset.. */
-
-                       if (debug & PRINT_VERBOSE) {
-                               fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0,
-                                               cat6->ldgb.pad3, cat6->ldgb.mustbe0);
-                       }
-               } else { /* ss == 'l' */
-                       fprintf(ctx->out, "l[");
-                       print_src(ctx, &src1);  /* simple byte offset */
-                       fprintf(ctx->out, "], ");
-                       print_src(ctx, &src2);  /* value */
-
-                       if (debug & PRINT_VERBOSE) {
-                               fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)",
-                                               cat6->ldgb.src3, cat6->ldgb.pad0,
-                                               cat6->ldgb.pad3, cat6->ldgb.mustbe0);
-                       }
-               }
-
-               return;
-       } else if (_OPC(6, cat6->opc) == OPC_RESINFO) {
-               dst.reg  = (reg_t)(cat6->ldgb.dst);
-
-               print_src(ctx, &dst);
-               fprintf(ctx->out, ", ");
-               fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo);
-
-               return;
-       } else if (_OPC(6, cat6->opc) == OPC_LDGB) {
-
-               src1.reg = (reg_t)(cat6->ldgb.src1);
-               src1.im  = cat6->ldgb.src1_im;
-               src2.reg = (reg_t)(cat6->ldgb.src2);
-               src2.im  = cat6->ldgb.src2_im;
-               dst.reg  = (reg_t)(cat6->ldgb.dst);
-
-               print_src(ctx, &dst);
-               fprintf(ctx->out, ", ");
-               fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
-               print_src(ctx, &src1);
-               fprintf(ctx->out, ", ");
-               print_src(ctx, &src2);
-
-               if (debug & PRINT_VERBOSE)
-                       fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0);
-
-               return;
-       } else if (_OPC(6, cat6->opc) == OPC_LDG && cat6->a.src1_im && cat6->a.src2_im) {
-               struct reginfo src3;
-
-               memset(&src3, 0, sizeof(src3));
-               src1.reg = (reg_t)(cat6->a.src1);
-               src2.reg = (reg_t)(cat6->a.src2);
-               src2.im  = cat6->a.src2_im;
-               src3.reg = (reg_t)(cat6->a.off);
-               src3.full = true;
-               dst.reg  = (reg_t)(cat6->d.dst);
-
-               print_src(ctx, &dst);
-               fprintf(ctx->out, ", g[");
-               print_src(ctx, &src1);
-               fprintf(ctx->out, "+");
-               print_src(ctx, &src3);
-               fprintf(ctx->out, "], ");
-               print_src(ctx, &src2);
-
-               return;
-       }
-       if (cat6->dst_off) {
-               dst.reg = (reg_t)(cat6->c.dst);
-               dstoff  = cat6->c.off;
-       } else {
-               dst.reg = (reg_t)(cat6->d.dst);
-       }
-
-       if (cat6->src_off) {
-               src1.reg = (reg_t)(cat6->a.src1);
-               src1.im  = cat6->a.src1_im;
-               src2.reg = (reg_t)(cat6->a.src2);
-               src2.im  = cat6->a.src2_im;
-               src1off  = cat6->a.off;
-       } else {
-               src1.reg = (reg_t)(cat6->b.src1);
-               src1.im  = cat6->b.src1_im;
-               src2.reg = (reg_t)(cat6->b.src2);
-               src2.im  = cat6->b.src2_im;
-       }
-
-       if (!nodst) {
-               if (sd)
-                       fprintf(ctx->out, "%c[", sd);
-               /* note: dst might actually be a src (ie. address to store to) */
-               print_src(ctx, &dst);
-               if (cat6->dst_off && cat6->g) {
-                       struct reginfo dstoff_reg = {0};
-                       dstoff_reg.reg = (reg_t) cat6->c.off;
-                       dstoff_reg.full  = true;
-                       fprintf(ctx->out, "+");
-                       print_src(ctx, &dstoff_reg);
-               } else if (dstoff)
-                       fprintf(ctx->out, "%+d", dstoff);
-               if (sd)
-                       fprintf(ctx->out, "]");
-               fprintf(ctx->out, ", ");
-       }
-
-       if (ss)
-               fprintf(ctx->out, "%c[", ss);
-
-       /* can have a larger than normal immed, so hack: */
-       if (src1.im) {
-               fprintf(ctx->out, "%u", src1.reg.dummy13);
-       } else {
-               print_src(ctx, &src1);
-       }
-
-       if (cat6->src_off && cat6->g)
-               print_src(ctx, &src2);
-       else if (src1off)
-               fprintf(ctx->out, "%+d", src1off);
-       if (ss)
-               fprintf(ctx->out, "]");
-
-       switch (_OPC(6, cat6->opc)) {
-       case OPC_RESINFO:
-       case OPC_RESFMT:
-               break;
-       default:
-               fprintf(ctx->out, ", ");
-               print_src(ctx, &src2);
-               break;
-       }
-}
-
-static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
-       struct reginfo src1, src2, ssbo;
-       bool uses_type = _OPC(6, cat6->opc) != OPC_LDC;
-
-       static const struct {
-               bool indirect;
-               bool bindless;
-               const char *name;
-       } desc_features[8] = {
-               [CAT6_IMM] = {
-                       .name = "imm"
-               },
-               [CAT6_UNIFORM] = {
-                       .indirect = true,
-                       .name = "uniform"
-               },
-               [CAT6_NONUNIFORM] = {
-                       .indirect = true,
-                       .name = "nonuniform"
-               },
-               [CAT6_BINDLESS_IMM] = {
-                       .bindless = true,
-                       .name = "imm"
-               },
-               [CAT6_BINDLESS_UNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-                       .name = "uniform"
-               },
-               [CAT6_BINDLESS_NONUNIFORM] = {
-                       .bindless = true,
-                       .indirect = true,
-                       .name = "nonuniform"
-               },
-       };
-
-       bool indirect_ssbo = desc_features[cat6->desc_mode].indirect;
-       bool bindless = desc_features[cat6->desc_mode].bindless;
-       bool type_full = cat6->type != TYPE_U16;
-
-
-       memset(&src1, 0, sizeof(src1));
-       memset(&src2, 0, sizeof(src2));
-       memset(&ssbo, 0, sizeof(ssbo));
-
-       if (uses_type) {
-               fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
-               fprintf(ctx->out, ".%dd", cat6->d + 1);
-               fprintf(ctx->out, ".%s", type[cat6->type]);
-       } else {
-               fprintf(ctx->out, ".offset%d", cat6->d);
-       }
-       fprintf(ctx->out, ".%u", cat6->type_size + 1);
-
-       fprintf(ctx->out, ".%s", desc_features[cat6->desc_mode].name);
-       if (bindless)
-               fprintf(ctx->out, ".base%d", cat6->base);
-       fprintf(ctx->out, " ");
-
-       src2.reg = (reg_t)(cat6->src2);
-       src2.full = type_full;
-       print_src(ctx, &src2);
-       fprintf(ctx->out, ", ");
-
-       src1.reg = (reg_t)(cat6->src1);
-       src1.full = true; // XXX
-       print_src(ctx, &src1);
-       fprintf(ctx->out, ", ");
-       ssbo.reg = (reg_t)(cat6->ssbo);
-       ssbo.im = !indirect_ssbo;
-       ssbo.full = true;
-       print_src(ctx, &ssbo);
-
-       if (debug & PRINT_VERBOSE) {
-               fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)",
-                               cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5);
-       }
-}
-
-static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr)
-{
-       if (!is_cat6_legacy(instr, ctx->gpu_id)) {
-               print_instr_cat6_a6xx(ctx, instr);
-               if (debug & PRINT_VERBOSE)
-                       fprintf(ctx->out, " NEW");
-       } else {
-               print_instr_cat6_a3xx(ctx, instr);
-               if (debug & PRINT_VERBOSE)
-                       fprintf(ctx->out, " LEGACY");
-       }
-}
-static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr)
-{
-       instr_cat7_t *cat7 = &instr->cat7;
-
-       if (cat7->g)
-               fprintf(ctx->out, ".g");
-       if (cat7->l)
-               fprintf(ctx->out, ".l");
-
-       if (_OPC(7, cat7->opc) == OPC_FENCE) {
-               if (cat7->r)
-                       fprintf(ctx->out, ".r");
-               if (cat7->w)
-                       fprintf(ctx->out, ".w");
-       }
-}
-
-/* size of largest OPC field of all the instruction categories: */
-#define NOPC_BITS 6
-
-static const struct opc_info {
-       uint16_t cat;
-       uint16_t opc;
-       const char *name;
-       void (*print)(struct disasm_ctx *ctx, instr_t *instr);
-} opcs[1 << (3+NOPC_BITS)] = {
-#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat }
-       /* category 0: */
-       OPC(0, OPC_NOP,          nop),
-       OPC(0, OPC_B,            b),
-       OPC(0, OPC_JUMP,         jump),
-       OPC(0, OPC_CALL,         call),
-       OPC(0, OPC_RET,          ret),
-       OPC(0, OPC_KILL,         kill),
-       OPC(0, OPC_END,          end),
-       OPC(0, OPC_EMIT,         emit),
-       OPC(0, OPC_CUT,          cut),
-       OPC(0, OPC_CHMASK,       chmask),
-       OPC(0, OPC_CHSH,         chsh),
-       OPC(0, OPC_FLOW_REV,     flow_rev),
-       OPC(0, OPC_PREDT,        predt),
-       OPC(0, OPC_PREDF,        predf),
-       OPC(0, OPC_PREDE,        prede),
-       OPC(0, OPC_BKT,          bkt),
-       OPC(0, OPC_STKS,         stks),
-       OPC(0, OPC_STKR,         stkr),
-       OPC(0, OPC_XSET,         xset),
-       OPC(0, OPC_XCLR,         xclr),
-       OPC(0, OPC_GETONE,       getone),
-       OPC(0, OPC_DBG,          dbg),
-       OPC(0, OPC_SHPS,         shps),
-       OPC(0, OPC_SHPE,         shpe),
-
-       /* category 1: */
-       OPC(1, OPC_MOV, ),
-
-       /* category 2: */
-       OPC(2, OPC_ADD_F,        add.f),
-       OPC(2, OPC_MIN_F,        min.f),
-       OPC(2, OPC_MAX_F,        max.f),
-       OPC(2, OPC_MUL_F,        mul.f),
-       OPC(2, OPC_SIGN_F,       sign.f),
-       OPC(2, OPC_CMPS_F,       cmps.f),
-       OPC(2, OPC_ABSNEG_F,     absneg.f),
-       OPC(2, OPC_CMPV_F,       cmpv.f),
-       OPC(2, OPC_FLOOR_F,      floor.f),
-       OPC(2, OPC_CEIL_F,       ceil.f),
-       OPC(2, OPC_RNDNE_F,      rndne.f),
-       OPC(2, OPC_RNDAZ_F,      rndaz.f),
-       OPC(2, OPC_TRUNC_F,      trunc.f),
-       OPC(2, OPC_ADD_U,        add.u),
-       OPC(2, OPC_ADD_S,        add.s),
-       OPC(2, OPC_SUB_U,        sub.u),
-       OPC(2, OPC_SUB_S,        sub.s),
-       OPC(2, OPC_CMPS_U,       cmps.u),
-       OPC(2, OPC_CMPS_S,       cmps.s),
-       OPC(2, OPC_MIN_U,        min.u),
-       OPC(2, OPC_MIN_S,        min.s),
-       OPC(2, OPC_MAX_U,        max.u),
-       OPC(2, OPC_MAX_S,        max.s),
-       OPC(2, OPC_ABSNEG_S,     absneg.s),
-       OPC(2, OPC_AND_B,        and.b),
-       OPC(2, OPC_OR_B,         or.b),
-       OPC(2, OPC_NOT_B,        not.b),
-       OPC(2, OPC_XOR_B,        xor.b),
-       OPC(2, OPC_CMPV_U,       cmpv.u),
-       OPC(2, OPC_CMPV_S,       cmpv.s),
-       OPC(2, OPC_MUL_U24,      mul.u24),
-       OPC(2, OPC_MUL_S24,      mul.s24),
-       OPC(2, OPC_MULL_U,       mull.u),
-       OPC(2, OPC_BFREV_B,      bfrev.b),
-       OPC(2, OPC_CLZ_S,        clz.s),
-       OPC(2, OPC_CLZ_B,        clz.b),
-       OPC(2, OPC_SHL_B,        shl.b),
-       OPC(2, OPC_SHR_B,        shr.b),
-       OPC(2, OPC_ASHR_B,       ashr.b),
-       OPC(2, OPC_BARY_F,       bary.f),
-       OPC(2, OPC_MGEN_B,       mgen.b),
-       OPC(2, OPC_GETBIT_B,     getbit.b),
-       OPC(2, OPC_SETRM,        setrm),
-       OPC(2, OPC_CBITS_B,      cbits.b),
-       OPC(2, OPC_SHB,          shb),
-       OPC(2, OPC_MSAD,         msad),
-
-       /* category 3: */
-       OPC(3, OPC_MAD_U16,      mad.u16),
-       OPC(3, OPC_MADSH_U16,    madsh.u16),
-       OPC(3, OPC_MAD_S16,      mad.s16),
-       OPC(3, OPC_MADSH_M16,    madsh.m16),
-       OPC(3, OPC_MAD_U24,      mad.u24),
-       OPC(3, OPC_MAD_S24,      mad.s24),
-       OPC(3, OPC_MAD_F16,      mad.f16),
-       OPC(3, OPC_MAD_F32,      mad.f32),
-       OPC(3, OPC_SEL_B16,      sel.b16),
-       OPC(3, OPC_SEL_B32,      sel.b32),
-       OPC(3, OPC_SEL_S16,      sel.s16),
-       OPC(3, OPC_SEL_S32,      sel.s32),
-       OPC(3, OPC_SEL_F16,      sel.f16),
-       OPC(3, OPC_SEL_F32,      sel.f32),
-       OPC(3, OPC_SAD_S16,      sad.s16),
-       OPC(3, OPC_SAD_S32,      sad.s32),
-
-       /* category 4: */
-       OPC(4, OPC_RCP,          rcp),
-       OPC(4, OPC_RSQ,          rsq),
-       OPC(4, OPC_LOG2,         log2),
-       OPC(4, OPC_EXP2,         exp2),
-       OPC(4, OPC_SIN,          sin),
-       OPC(4, OPC_COS,          cos),
-       OPC(4, OPC_SQRT,         sqrt),
-       OPC(4, OPC_HRSQ,         hrsq),
-       OPC(4, OPC_HLOG2,        hlog2),
-       OPC(4, OPC_HEXP2,        hexp2),
-
-       /* category 5: */
-       OPC(5, OPC_ISAM,         isam),
-       OPC(5, OPC_ISAML,        isaml),
-       OPC(5, OPC_ISAMM,        isamm),
-       OPC(5, OPC_SAM,          sam),
-       OPC(5, OPC_SAMB,         samb),
-       OPC(5, OPC_SAML,         saml),
-       OPC(5, OPC_SAMGQ,        samgq),
-       OPC(5, OPC_GETLOD,       getlod),
-       OPC(5, OPC_CONV,         conv),
-       OPC(5, OPC_CONVM,        convm),
-       OPC(5, OPC_GETSIZE,      getsize),
-       OPC(5, OPC_GETBUF,       getbuf),
-       OPC(5, OPC_GETPOS,       getpos),
-       OPC(5, OPC_GETINFO,      getinfo),
-       OPC(5, OPC_DSX,          dsx),
-       OPC(5, OPC_DSY,          dsy),
-       OPC(5, OPC_GATHER4R,     gather4r),
-       OPC(5, OPC_GATHER4G,     gather4g),
-       OPC(5, OPC_GATHER4B,     gather4b),
-       OPC(5, OPC_GATHER4A,     gather4a),
-       OPC(5, OPC_SAMGP0,       samgp0),
-       OPC(5, OPC_SAMGP1,       samgp1),
-       OPC(5, OPC_SAMGP2,       samgp2),
-       OPC(5, OPC_SAMGP3,       samgp3),
-       OPC(5, OPC_DSXPP_1,      dsxpp.1),
-       OPC(5, OPC_DSYPP_1,      dsypp.1),
-       OPC(5, OPC_RGETPOS,      rgetpos),
-       OPC(5, OPC_RGETINFO,     rgetinfo),
-
-
-       /* category 6: */
-       OPC(6, OPC_LDG,          ldg),
-       OPC(6, OPC_LDL,          ldl),
-       OPC(6, OPC_LDP,          ldp),
-       OPC(6, OPC_STG,          stg),
-       OPC(6, OPC_STL,          stl),
-       OPC(6, OPC_STP,          stp),
-       OPC(6, OPC_LDIB,         ldib),
-       OPC(6, OPC_G2L,          g2l),
-       OPC(6, OPC_L2G,          l2g),
-       OPC(6, OPC_PREFETCH,     prefetch),
-       OPC(6, OPC_LDLW,         ldlw),
-       OPC(6, OPC_STLW,         stlw),
-       OPC(6, OPC_RESFMT,       resfmt),
-       OPC(6, OPC_RESINFO,      resinfo),
-       OPC(6, OPC_ATOMIC_ADD,     atomic.add),
-       OPC(6, OPC_ATOMIC_SUB,     atomic.sub),
-       OPC(6, OPC_ATOMIC_XCHG,    atomic.xchg),
-       OPC(6, OPC_ATOMIC_INC,     atomic.inc),
-       OPC(6, OPC_ATOMIC_DEC,     atomic.dec),
-       OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
-       OPC(6, OPC_ATOMIC_MIN,     atomic.min),
-       OPC(6, OPC_ATOMIC_MAX,     atomic.max),
-       OPC(6, OPC_ATOMIC_AND,     atomic.and),
-       OPC(6, OPC_ATOMIC_OR,      atomic.or),
-       OPC(6, OPC_ATOMIC_XOR,     atomic.xor),
-       OPC(6, OPC_LDGB,         ldgb),
-       OPC(6, OPC_STGB,         stgb),
-       OPC(6, OPC_STIB,         stib),
-       OPC(6, OPC_LDC,          ldc),
-       OPC(6, OPC_LDLV,         ldlv),
-
-       OPC(7, OPC_BAR,          bar),
-       OPC(7, OPC_FENCE,        fence),
-
-
-#undef OPC
-};
-
-#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
-
-static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr)
-{
-       const char *name = GETINFO(instr)->name;
-       uint32_t opc = instr_opc(instr, ctx->gpu_id);
-
-       if (name) {
-               fprintf(ctx->out, "%s", name);
-               GETINFO(instr)->print(ctx, instr);
-       } else {
-               fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc);
-
-               switch (instr->opc_cat) {
-               case 0: print_instr_cat0(ctx, instr); break;
-               case 1: print_instr_cat1(ctx, instr); break;
-               case 2: print_instr_cat2(ctx, instr); break;
-               case 3: print_instr_cat3(ctx, instr); break;
-               case 4: print_instr_cat4(ctx, instr); break;
-               case 5: print_instr_cat5(ctx, instr); break;
-               case 6: print_instr_cat6(ctx, instr); break;
-               case 7: print_instr_cat7(ctx, instr); break;
-               }
-       }
-}
-
-static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
-{
-       instr_t *instr = (instr_t *)dwords;
-       uint32_t opc = instr_opc(instr, ctx->gpu_id);
-       unsigned nop = 0;
-       unsigned cycles = ctx->stats->instructions;
-
-       fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
-                       instr->opc_cat, n, cycles++, dwords[1], dwords[0]);
-
-#if 0
-       /* print unknown bits: */
-       if (debug & PRINT_RAW)
-               fprintf(ctx->out, "[%08xx_%08xx] ", dwords[1] & 0x001ff800, dwords[0] & 0x00000000);
-
-       if (debug & PRINT_VERBOSE)
-               fprintf(ctx->out, "%d,%02d ", instr->opc_cat, opc);
-#endif
-
-       /* NOTE: order flags are printed is a bit fugly.. but for now I
-        * try to match the order in llvm-a3xx disassembler for easy
-        * diff'ing..
-        */
-
-       ctx->repeat = instr_repeat(instr);
-       ctx->stats->instructions += 1 + ctx->repeat;
-       ctx->stats->instlen++;
-
-       if (instr->sync) {
-               fprintf(ctx->out, "(sy)");
-               ctx->stats->sy++;
-       }
-       if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) {
-               fprintf(ctx->out, "(ss)");
-               ctx->stats->ss++;
-       }
-       if (instr->jmp_tgt)
-               fprintf(ctx->out, "(jp)");
-       if ((instr->opc_cat == 0) && instr->cat0.eq)
-               fprintf(ctx->out, "(eq)");
-       if (instr_sat(instr))
-               fprintf(ctx->out, "(sat)");
-       if (ctx->repeat)
-               fprintf(ctx->out, "(rpt%d)", ctx->repeat);
-       else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r))
-               nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r;
-       else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r))
-               nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r;
-       ctx->stats->instructions += nop;
-       ctx->stats->nops += nop;
-       if (opc == OPC_NOP)
-               ctx->stats->nops += 1 + ctx->repeat;
-       if (nop)
-               fprintf(ctx->out, "(nop%d) ", nop);
-
-       if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
-               fprintf(ctx->out, "(ul)");
-
-       print_single_instr(ctx, instr);
-       fprintf(ctx->out, "\n");
-
-       process_reg_dst(ctx);
-
-       if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) {
-               int i;
-               for (i = 0; i < nop; i++) {
-                       fprintf(ctx->out, "%s:%d:%04d:%04d[                   ] ",
-                                       levels[ctx->level], instr->opc_cat, n, cycles++);
-                       fprintf(ctx->out, "nop\n");
-               }
-               for (i = 0; i < ctx->repeat; i++) {
-                       ctx->repeatidx = i + 1;
-                       fprintf(ctx->out, "%s:%d:%04d:%04d[                   ] ",
-                                       levels[ctx->level], instr->opc_cat, n, cycles++);
-
-                       print_single_instr(ctx, instr);
-                       fprintf(ctx->out, "\n");
-               }
-               ctx->repeatidx = 0;
-       }
-
-       return (instr->opc_cat == 0) &&
-               ((opc == OPC_END) || (opc == OPC_CHSH));
-}
-
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
-{
-       struct shader_stats stats;
-       return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats);
-}
-
-int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
-               unsigned gpu_id, struct shader_stats *stats)
-{
-       struct disasm_ctx ctx;
-       int i;
-       int nop_count = 0;
-       bool has_end = false;
-
-//     ir3_assert((sizedwords % 2) == 0);
-
-       memset(&ctx, 0, sizeof(ctx));
-       ctx.out = out;
-       ctx.level = level;
-       ctx.gpu_id = gpu_id;
-       ctx.stats = stats;
-       memset(ctx.stats, 0, sizeof(*ctx.stats));
-
-       for (i = 0; i < sizedwords; i += 2) {
-               has_end |= print_instr(&ctx, &dwords[i], i/2);
-               if (!has_end)
-                       continue;
-               if (dwords[i] == 0 && dwords[i + 1] == 0)
-                       nop_count++;
-               else
-                       nop_count = 0;
-               if (nop_count > 3)
-                       break;
-       }
-
-       print_reg_stats(&ctx);
-
-       return 0;
-}
diff --git a/src/freedreno/decode/instr-a3xx.h b/src/freedreno/decode/instr-a3xx.h
deleted file mode 100644 (file)
index 218bdc3..0000000
+++ /dev/null
@@ -1,1115 +0,0 @@
-/*
- * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef INSTR_A3XX_H_
-#define INSTR_A3XX_H_
-
-#define PACKED __attribute__((__packed__))
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <assert.h>
-
-void ir3_assert_handler(const char *expr, const char *file, int line,
-               const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
-
-/* A wrapper for assert() that allows overriding handling of a failed
- * assert.  This is needed for tools like crashdec which can want to
- * attempt to disassemble memory that might not actually be valid
- * instructions.
- */
-#define ir3_assert(expr) do { \
-               if (!(expr)) { \
-                       if (ir3_assert_handler) { \
-                               ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
-                       } \
-                       assert(expr); \
-               } \
-       } while (0)
-
-/* size of largest OPC field of all the instruction categories: */
-#define NOPC_BITS 6
-
-#define _OPC(cat, opc)   (((cat) << NOPC_BITS) | opc)
-
-typedef enum {
-       /* category 0: */
-       OPC_NOP             = _OPC(0, 0),
-       OPC_B               = _OPC(0, 1),
-       OPC_JUMP            = _OPC(0, 2),
-       OPC_CALL            = _OPC(0, 3),
-       OPC_RET             = _OPC(0, 4),
-       OPC_KILL            = _OPC(0, 5),
-       OPC_END             = _OPC(0, 6),
-       OPC_EMIT            = _OPC(0, 7),
-       OPC_CUT             = _OPC(0, 8),
-       OPC_CHMASK          = _OPC(0, 9),
-       OPC_CHSH            = _OPC(0, 10),
-       OPC_FLOW_REV        = _OPC(0, 11),
-
-       OPC_BKT             = _OPC(0, 16),
-       OPC_STKS            = _OPC(0, 17),
-       OPC_STKR            = _OPC(0, 18),
-       OPC_XSET            = _OPC(0, 19),
-       OPC_XCLR            = _OPC(0, 20),
-       OPC_GETONE          = _OPC(0, 21),
-       OPC_DBG             = _OPC(0, 22),
-       OPC_SHPS            = _OPC(0, 23),   /* shader prologue start */
-       OPC_SHPE            = _OPC(0, 24),   /* shader prologue end */
-
-       OPC_PREDT           = _OPC(0, 29),   /* predicated true */
-       OPC_PREDF           = _OPC(0, 30),   /* predicated false */
-       OPC_PREDE           = _OPC(0, 31),   /* predicated end */
-
-       /* category 1: */
-       OPC_MOV             = _OPC(1, 0),
-
-       /* category 2: */
-       OPC_ADD_F           = _OPC(2, 0),
-       OPC_MIN_F           = _OPC(2, 1),
-       OPC_MAX_F           = _OPC(2, 2),
-       OPC_MUL_F           = _OPC(2, 3),
-       OPC_SIGN_F          = _OPC(2, 4),
-       OPC_CMPS_F          = _OPC(2, 5),
-       OPC_ABSNEG_F        = _OPC(2, 6),
-       OPC_CMPV_F          = _OPC(2, 7),
-       /* 8 - invalid */
-       OPC_FLOOR_F         = _OPC(2, 9),
-       OPC_CEIL_F          = _OPC(2, 10),
-       OPC_RNDNE_F         = _OPC(2, 11),
-       OPC_RNDAZ_F         = _OPC(2, 12),
-       OPC_TRUNC_F         = _OPC(2, 13),
-       /* 14-15 - invalid */
-       OPC_ADD_U           = _OPC(2, 16),
-       OPC_ADD_S           = _OPC(2, 17),
-       OPC_SUB_U           = _OPC(2, 18),
-       OPC_SUB_S           = _OPC(2, 19),
-       OPC_CMPS_U          = _OPC(2, 20),
-       OPC_CMPS_S          = _OPC(2, 21),
-       OPC_MIN_U           = _OPC(2, 22),
-       OPC_MIN_S           = _OPC(2, 23),
-       OPC_MAX_U           = _OPC(2, 24),
-       OPC_MAX_S           = _OPC(2, 25),
-       OPC_ABSNEG_S        = _OPC(2, 26),
-       /* 27 - invalid */
-       OPC_AND_B           = _OPC(2, 28),
-       OPC_OR_B            = _OPC(2, 29),
-       OPC_NOT_B           = _OPC(2, 30),
-       OPC_XOR_B           = _OPC(2, 31),
-       /* 32 - invalid */
-       OPC_CMPV_U          = _OPC(2, 33),
-       OPC_CMPV_S          = _OPC(2, 34),
-       /* 35-47 - invalid */
-       OPC_MUL_U24         = _OPC(2, 48), /* 24b mul into 32b result */
-       OPC_MUL_S24         = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
-       OPC_MULL_U          = _OPC(2, 50),
-       OPC_BFREV_B         = _OPC(2, 51),
-       OPC_CLZ_S           = _OPC(2, 52),
-       OPC_CLZ_B           = _OPC(2, 53),
-       OPC_SHL_B           = _OPC(2, 54),
-       OPC_SHR_B           = _OPC(2, 55),
-       OPC_ASHR_B          = _OPC(2, 56),
-       OPC_BARY_F          = _OPC(2, 57),
-       OPC_MGEN_B          = _OPC(2, 58),
-       OPC_GETBIT_B        = _OPC(2, 59),
-       OPC_SETRM           = _OPC(2, 60),
-       OPC_CBITS_B         = _OPC(2, 61),
-       OPC_SHB             = _OPC(2, 62),
-       OPC_MSAD            = _OPC(2, 63),
-
-       /* category 3: */
-       OPC_MAD_U16         = _OPC(3, 0),
-       OPC_MADSH_U16       = _OPC(3, 1),
-       OPC_MAD_S16         = _OPC(3, 2),
-       OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
-       OPC_MAD_U24         = _OPC(3, 4),
-       OPC_MAD_S24         = _OPC(3, 5),
-       OPC_MAD_F16         = _OPC(3, 6),
-       OPC_MAD_F32         = _OPC(3, 7),
-       OPC_SEL_B16         = _OPC(3, 8),
-       OPC_SEL_B32         = _OPC(3, 9),
-       OPC_SEL_S16         = _OPC(3, 10),
-       OPC_SEL_S32         = _OPC(3, 11),
-       OPC_SEL_F16         = _OPC(3, 12),
-       OPC_SEL_F32         = _OPC(3, 13),
-       OPC_SAD_S16         = _OPC(3, 14),
-       OPC_SAD_S32         = _OPC(3, 15),
-
-       /* category 4: */
-       OPC_RCP             = _OPC(4, 0),
-       OPC_RSQ             = _OPC(4, 1),
-       OPC_LOG2            = _OPC(4, 2),
-       OPC_EXP2            = _OPC(4, 3),
-       OPC_SIN             = _OPC(4, 4),
-       OPC_COS             = _OPC(4, 5),
-       OPC_SQRT            = _OPC(4, 6),
-       /* NOTE that these are 8+opc from their highp equivs, so it's possible
-        * that the high order bit in the opc field has been repurposed for
-        * half-precision use?  But note that other ops (rcp/lsin/cos/sqrt)
-        * still use the same opc as highp
-        */
-       OPC_HRSQ            = _OPC(4, 9),
-       OPC_HLOG2           = _OPC(4, 10),
-       OPC_HEXP2           = _OPC(4, 11),
-
-       /* category 5: */
-       OPC_ISAM            = _OPC(5, 0),
-       OPC_ISAML           = _OPC(5, 1),
-       OPC_ISAMM           = _OPC(5, 2),
-       OPC_SAM             = _OPC(5, 3),
-       OPC_SAMB            = _OPC(5, 4),
-       OPC_SAML            = _OPC(5, 5),
-       OPC_SAMGQ           = _OPC(5, 6),
-       OPC_GETLOD          = _OPC(5, 7),
-       OPC_CONV            = _OPC(5, 8),
-       OPC_CONVM           = _OPC(5, 9),
-       OPC_GETSIZE         = _OPC(5, 10),
-       OPC_GETBUF          = _OPC(5, 11),
-       OPC_GETPOS          = _OPC(5, 12),
-       OPC_GETINFO         = _OPC(5, 13),
-       OPC_DSX             = _OPC(5, 14),
-       OPC_DSY             = _OPC(5, 15),
-       OPC_GATHER4R        = _OPC(5, 16),
-       OPC_GATHER4G        = _OPC(5, 17),
-       OPC_GATHER4B        = _OPC(5, 18),
-       OPC_GATHER4A        = _OPC(5, 19),
-       OPC_SAMGP0          = _OPC(5, 20),
-       OPC_SAMGP1          = _OPC(5, 21),
-       OPC_SAMGP2          = _OPC(5, 22),
-       OPC_SAMGP3          = _OPC(5, 23),
-       OPC_DSXPP_1         = _OPC(5, 24),
-       OPC_DSYPP_1         = _OPC(5, 25),
-       OPC_RGETPOS         = _OPC(5, 26),
-       OPC_RGETINFO        = _OPC(5, 27),
-
-       /* category 6: */
-       OPC_LDG             = _OPC(6, 0),        /* load-global */
-       OPC_LDL             = _OPC(6, 1),
-       OPC_LDP             = _OPC(6, 2),
-       OPC_STG             = _OPC(6, 3),        /* store-global */
-       OPC_STL             = _OPC(6, 4),
-       OPC_STP             = _OPC(6, 5),
-       OPC_LDIB            = _OPC(6, 6),
-       OPC_G2L             = _OPC(6, 7),
-       OPC_L2G             = _OPC(6, 8),
-       OPC_PREFETCH        = _OPC(6, 9),
-       OPC_LDLW            = _OPC(6, 10),
-       OPC_STLW            = _OPC(6, 11),
-       OPC_RESFMT          = _OPC(6, 14),
-       OPC_RESINFO         = _OPC(6, 15),
-       OPC_ATOMIC_ADD      = _OPC(6, 16),
-       OPC_ATOMIC_SUB      = _OPC(6, 17),
-       OPC_ATOMIC_XCHG     = _OPC(6, 18),
-       OPC_ATOMIC_INC      = _OPC(6, 19),
-       OPC_ATOMIC_DEC      = _OPC(6, 20),
-       OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
-       OPC_ATOMIC_MIN      = _OPC(6, 22),
-       OPC_ATOMIC_MAX      = _OPC(6, 23),
-       OPC_ATOMIC_AND      = _OPC(6, 24),
-       OPC_ATOMIC_OR       = _OPC(6, 25),
-       OPC_ATOMIC_XOR      = _OPC(6, 26),
-       OPC_LDGB            = _OPC(6, 27),
-       OPC_STGB            = _OPC(6, 28),
-       OPC_STIB            = _OPC(6, 29),
-       OPC_LDC             = _OPC(6, 30),
-       OPC_LDLV            = _OPC(6, 31),
-
-       /* category 7: */
-       OPC_BAR             = _OPC(7, 0),
-       OPC_FENCE           = _OPC(7, 1),
-} opc_t;
-
-#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
-#define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
-
-typedef enum {
-       TYPE_F16 = 0,
-       TYPE_F32 = 1,
-       TYPE_U16 = 2,
-       TYPE_U32 = 3,
-       TYPE_S16 = 4,
-       TYPE_S32 = 5,
-       TYPE_U8  = 6,
-       TYPE_S8  = 7,  // XXX I assume?
-} type_t;
-
-static inline uint32_t type_size(type_t type)
-{
-       switch (type) {
-       case TYPE_F32:
-       case TYPE_U32:
-       case TYPE_S32:
-               return 32;
-       case TYPE_F16:
-       case TYPE_U16:
-       case TYPE_S16:
-               return 16;
-       case TYPE_U8:
-       case TYPE_S8:
-               return 8;
-       default:
-               ir3_assert(0); /* invalid type */
-               return 0;
-       }
-}
-
-static inline int type_float(type_t type)
-{
-       return (type == TYPE_F32) || (type == TYPE_F16);
-}
-
-static inline int type_uint(type_t type)
-{
-       return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
-}
-
-static inline int type_sint(type_t type)
-{
-       return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
-}
-
-typedef union PACKED {
-       /* normal gpr or const src register: */
-       struct PACKED {
-               uint32_t comp  : 2;
-               uint32_t num   : 10;
-       };
-       /* for immediate val: */
-       int32_t  iim_val   : 11;
-       /* to make compiler happy: */
-       uint32_t dummy32;
-       uint32_t dummy10   : 10;
-       int32_t  idummy10  : 10;
-       uint32_t dummy11   : 11;
-       uint32_t dummy12   : 12;
-       uint32_t dummy13   : 13;
-       uint32_t dummy8    : 8;
-       int32_t  idummy13  : 13;
-       int32_t  idummy8   : 8;
-} reg_t;
-
-/* special registers: */
-#define REG_A0 61       /* address register */
-#define REG_P0 62       /* predicate register */
-
-static inline int reg_special(reg_t reg)
-{
-       return (reg.num == REG_A0) || (reg.num == REG_P0);
-}
-
-typedef enum {
-       BRANCH_PLAIN = 0,   /* br */
-       BRANCH_OR    = 1,   /* brao */
-       BRANCH_AND   = 2,   /* braa */
-       BRANCH_CONST = 3,   /* brac */
-       BRANCH_ANY   = 4,   /* bany */
-       BRANCH_ALL   = 5,   /* ball */
-       BRANCH_X     = 6,   /* brax ??? */
-} brtype_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               struct PACKED {
-                       int16_t  immed    : 16;
-                       uint32_t dummy1   : 16;
-               } a3xx;
-               struct PACKED {
-                       int32_t  immed    : 20;
-                       uint32_t dummy1   : 12;
-               } a4xx;
-               struct PACKED {
-                       int32_t immed     : 32;
-               } a5xx;
-       };
-
-       /* dword1: */
-       uint32_t idx      : 5;  /* brac.N index */
-       uint32_t brtype   : 3;  /* branch type, see brtype_t */
-       uint32_t repeat   : 3;
-       uint32_t dummy3   : 1;
-       uint32_t ss       : 1;
-       uint32_t inv1     : 1;
-       uint32_t comp1    : 2;
-       uint32_t eq       : 1;
-       uint32_t opc_hi   : 1;  /* at least one bit */
-       uint32_t dummy4   : 2;
-       uint32_t inv0     : 1;
-       uint32_t comp0    : 2;  /* component for first src */
-       uint32_t opc      : 4;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
-} instr_cat0_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               /* for normal src register: */
-               struct PACKED {
-                       uint32_t src : 11;
-                       /* at least low bit of pad must be zero or it will
-                        * look like a address relative src
-                        */
-                       uint32_t pad : 21;
-               };
-               /* for address relative: */
-               struct PACKED {
-                       int32_t  off : 10;
-                       uint32_t src_rel_c : 1;
-                       uint32_t src_rel : 1;
-                       uint32_t unknown : 20;
-               };
-               /* for immediate: */
-               int32_t  iim_val;
-               uint32_t uim_val;
-               float    fim_val;
-       };
-
-       /* dword1: */
-       uint32_t dst        : 8;
-       uint32_t repeat     : 3;
-       uint32_t src_r      : 1;
-       uint32_t ss         : 1;
-       uint32_t ul         : 1;
-       uint32_t dst_type   : 3;
-       uint32_t dst_rel    : 1;
-       uint32_t src_type   : 3;
-       uint32_t src_c      : 1;
-       uint32_t src_im     : 1;
-       uint32_t even       : 1;
-       uint32_t pos_inf    : 1;
-       uint32_t must_be_0  : 2;
-       uint32_t jmp_tgt    : 1;
-       uint32_t sync       : 1;
-       uint32_t opc_cat    : 3;
-} instr_cat1_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               struct PACKED {
-                       uint32_t src1         : 11;
-                       uint32_t must_be_zero1: 2;
-                       uint32_t src1_im      : 1;   /* immediate */
-                       uint32_t src1_neg     : 1;   /* negate */
-                       uint32_t src1_abs     : 1;   /* absolute value */
-               };
-               struct PACKED {
-                       uint32_t src1         : 10;
-                       uint32_t src1_c       : 1;   /* relative-const */
-                       uint32_t src1_rel     : 1;   /* relative address */
-                       uint32_t must_be_zero : 1;
-                       uint32_t dummy        : 3;
-               } rel1;
-               struct PACKED {
-                       uint32_t src1         : 12;
-                       uint32_t src1_c       : 1;   /* const */
-                       uint32_t dummy        : 3;
-               } c1;
-       };
-
-       union PACKED {
-               struct PACKED {
-                       uint32_t src2         : 11;
-                       uint32_t must_be_zero2: 2;
-                       uint32_t src2_im      : 1;   /* immediate */
-                       uint32_t src2_neg     : 1;   /* negate */
-                       uint32_t src2_abs     : 1;   /* absolute value */
-               };
-               struct PACKED {
-                       uint32_t src2         : 10;
-                       uint32_t src2_c       : 1;   /* relative-const */
-                       uint32_t src2_rel     : 1;   /* relative address */
-                       uint32_t must_be_zero : 1;
-                       uint32_t dummy        : 3;
-               } rel2;
-               struct PACKED {
-                       uint32_t src2         : 12;
-                       uint32_t src2_c       : 1;   /* const */
-                       uint32_t dummy        : 3;
-               } c2;
-       };
-
-       /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t repeat   : 2;
-       uint32_t sat      : 1;
-       uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
-       uint32_t ss       : 1;
-       uint32_t ul       : 1;   /* dunno */
-       uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
-       uint32_t ei       : 1;
-       uint32_t cond     : 3;
-       uint32_t src2_r   : 1;   /* doubles as nop1 if repeat==0 */
-       uint32_t full     : 1;   /* not half */
-       uint32_t opc      : 6;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
-} instr_cat2_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               struct PACKED {
-                       uint32_t src1         : 11;
-                       uint32_t must_be_zero1: 2;
-                       uint32_t src2_c       : 1;
-                       uint32_t src1_neg     : 1;
-                       uint32_t src2_r       : 1;  /* doubles as nop1 if repeat==0 */
-               };
-               struct PACKED {
-                       uint32_t src1         : 10;
-                       uint32_t src1_c       : 1;
-                       uint32_t src1_rel     : 1;
-                       uint32_t must_be_zero : 1;
-                       uint32_t dummy        : 3;
-               } rel1;
-               struct PACKED {
-                       uint32_t src1         : 12;
-                       uint32_t src1_c       : 1;
-                       uint32_t dummy        : 3;
-               } c1;
-       };
-
-       union PACKED {
-               struct PACKED {
-                       uint32_t src3         : 11;
-                       uint32_t must_be_zero2: 2;
-                       uint32_t src3_r       : 1;
-                       uint32_t src2_neg     : 1;
-                       uint32_t src3_neg     : 1;
-               };
-               struct PACKED {
-                       uint32_t src3         : 10;
-                       uint32_t src3_c       : 1;
-                       uint32_t src3_rel     : 1;
-                       uint32_t must_be_zero : 1;
-                       uint32_t dummy        : 3;
-               } rel2;
-               struct PACKED {
-                       uint32_t src3         : 12;
-                       uint32_t src3_c       : 1;
-                       uint32_t dummy        : 3;
-               } c2;
-       };
-
-       /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t repeat   : 2;
-       uint32_t sat      : 1;
-       uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
-       uint32_t ss       : 1;
-       uint32_t ul       : 1;
-       uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
-       uint32_t src2     : 8;
-       uint32_t opc      : 4;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
-} instr_cat3_t;
-
-static inline bool instr_cat3_full(instr_cat3_t *cat3)
-{
-       switch (_OPC(3, cat3->opc)) {
-       case OPC_MAD_F16:
-       case OPC_MAD_U16:
-       case OPC_MAD_S16:
-       case OPC_SEL_B16:
-       case OPC_SEL_S16:
-       case OPC_SEL_F16:
-       case OPC_SAD_S16:
-       case OPC_SAD_S32:  // really??
-               return false;
-       default:
-               return true;
-       }
-}
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               struct PACKED {
-                       uint32_t src          : 11;
-                       uint32_t must_be_zero1: 2;
-                       uint32_t src_im       : 1;   /* immediate */
-                       uint32_t src_neg      : 1;   /* negate */
-                       uint32_t src_abs      : 1;   /* absolute value */
-               };
-               struct PACKED {
-                       uint32_t src          : 10;
-                       uint32_t src_c        : 1;   /* relative-const */
-                       uint32_t src_rel      : 1;   /* relative address */
-                       uint32_t must_be_zero : 1;
-                       uint32_t dummy        : 3;
-               } rel;
-               struct PACKED {
-                       uint32_t src          : 12;
-                       uint32_t src_c        : 1;   /* const */
-                       uint32_t dummy        : 3;
-               } c;
-       };
-       uint32_t dummy1   : 16;  /* seem to be ignored */
-
-       /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t repeat   : 2;
-       uint32_t sat      : 1;
-       uint32_t src_r    : 1;
-       uint32_t ss       : 1;
-       uint32_t ul       : 1;
-       uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
-       uint32_t dummy2   : 5;   /* seem to be ignored */
-       uint32_t full     : 1;   /* not half */
-       uint32_t opc      : 6;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
-} instr_cat4_t;
-
-/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
- * if so, how to get the (base, index) pair for both sampler and texture.
- * There is a single base embedded in the instruction, which is always used
- * for the texture.
- */
-typedef enum {
-       /* Use traditional GL binding model, get texture and sampler index
-        * from src3 which is not presumed to be uniform. This is
-        * backwards-compatible with earlier generations, where this field was
-        * always 0 and nonuniform-indexed sampling always worked.
-        */
-       CAT5_NONUNIFORM = 0,
-
-       /* The sampler base comes from the low 3 bits of a1.x, and the sampler
-        * and texture index come from src3 which is presumed to be uniform.
-        */
-       CAT5_BINDLESS_A1_UNIFORM = 1,
-
-       /* The texture and sampler share the same base, and the sampler and
-        * texture index come from src3 which is *not* presumed to be uniform.
-        */
-       CAT5_BINDLESS_NONUNIFORM = 2,
-
-       /* The sampler base comes from the low 3 bits of a1.x, and the sampler
-        * and texture index come from src3 which is *not* presumed to be
-        * uniform.
-        */
-       CAT5_BINDLESS_A1_NONUNIFORM = 3,
-
-       /* Use traditional GL binding model, get texture and sampler index
-        * from src3 which is presumed to be uniform.
-        */
-       CAT5_UNIFORM = 4,
-
-       /* The texture and sampler share the same base, and the sampler and
-        * texture index come from src3 which is presumed to be uniform.
-        */
-       CAT5_BINDLESS_UNIFORM = 5,
-
-       /* The texture and sampler share the same base, get sampler index from low
-        * 4 bits of src3 and texture index from high 4 bits.
-        */
-       CAT5_BINDLESS_IMM = 6,
-
-       /* The sampler base comes from the low 3 bits of a1.x, and the texture
-        * index comes from the next 8 bits of a1.x. The sampler index is an
-        * immediate in src3.
-        */
-       CAT5_BINDLESS_A1_IMM = 7,
-} cat5_desc_mode_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       union PACKED {
-               /* normal case: */
-               struct PACKED {
-                       uint32_t full     : 1;   /* not half */
-                       uint32_t src1     : 8;
-                       uint32_t src2     : 8;
-                       uint32_t dummy1   : 4;   /* seem to be ignored */
-                       uint32_t samp     : 4;
-                       uint32_t tex      : 7;
-               } norm;
-               /* s2en case: */
-               struct PACKED {
-                       uint32_t full         : 1;   /* not half */
-                       uint32_t src1         : 8;
-                       uint32_t src2         : 8;
-                       uint32_t dummy1       : 2;
-                       uint32_t base_hi      : 2;
-                       uint32_t src3         : 8;
-                       uint32_t desc_mode    : 3;
-               } s2en_bindless;
-               /* same in either case: */
-               // XXX I think, confirm this
-               struct PACKED {
-                       uint32_t full     : 1;   /* not half */
-                       uint32_t src1     : 8;
-                       uint32_t src2     : 8;
-                       uint32_t pad      : 15;
-               };
-       };
-
-       /* dword1: */
-       uint32_t dst              : 8;
-       uint32_t wrmask           : 4;   /* write-mask */
-       uint32_t type             : 3;
-       uint32_t base_lo          : 1;   /* used with bindless */
-       uint32_t is_3d            : 1;
-
-       uint32_t is_a             : 1;
-       uint32_t is_s             : 1;
-       uint32_t is_s2en_bindless : 1;
-       uint32_t is_o             : 1;
-       uint32_t is_p             : 1;
-
-       uint32_t opc              : 5;
-       uint32_t jmp_tgt          : 1;
-       uint32_t sync             : 1;
-       uint32_t opc_cat          : 3;
-} instr_cat5_t;
-
-/* dword0 encoding for src_off: [src1 + off], src2: */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t mustbe1  : 1;
-       int32_t  off      : 13;
-       uint32_t src1     : 8;
-       uint32_t src1_im  : 1;
-       uint32_t src2_im  : 1;
-       uint32_t src2     : 8;
-
-       /* dword1: */
-       uint32_t dword1;
-} instr_cat6a_t;
-
-/* dword0 encoding for !src_off: [src1], src2 */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t mustbe0  : 1;
-       uint32_t src1     : 13;
-       uint32_t ignore0  : 8;
-       uint32_t src1_im  : 1;
-       uint32_t src2_im  : 1;
-       uint32_t src2     : 8;
-
-       /* dword1: */
-       uint32_t dword1;
-} instr_cat6b_t;
-
-/* dword1 encoding for dst_off: */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t dword0;
-
-       /* note: there is some weird stuff going on where sometimes
-        * cat6->a.off is involved.. but that seems like a bug in
-        * the blob, since it is used even if !cat6->src_off
-        * It would make sense for there to be some more bits to
-        * bring us to 11 bits worth of offset, but not sure..
-        */
-       int32_t off       : 8;
-       uint32_t mustbe1  : 1;
-       uint32_t dst      : 8;
-       uint32_t pad1     : 15;
-} instr_cat6c_t;
-
-/* dword1 encoding for !dst_off: */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t dword0;
-
-       uint32_t dst      : 8;
-       uint32_t mustbe0  : 1;
-       uint32_t idx      : 8;
-       uint32_t pad0     : 15;
-} instr_cat6d_t;
-
-/* ldgb and atomics..
- *
- * ldgb:      pad0=0, pad3=1
- * atomic .g: pad0=1, pad3=1
- *        .l: pad0=1, pad3=0
- */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t pad0     : 1;
-       uint32_t src3     : 8;
-       uint32_t d        : 2;
-       uint32_t typed    : 1;
-       uint32_t type_size : 2;
-       uint32_t src1     : 8;
-       uint32_t src1_im  : 1;
-       uint32_t src2_im  : 1;
-       uint32_t src2     : 8;
-
-       /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t mustbe0  : 1;
-       uint32_t src_ssbo : 8;
-       uint32_t pad2     : 3;  // type
-       uint32_t g        : 1;
-       uint32_t pad3     : 1;
-       uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
-} instr_cat6ldgb_t;
-
-/* stgb, pad0=0, pad3=2
- */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t mustbe1  : 1;  // ???
-       uint32_t src1     : 8;
-       uint32_t d        : 2;
-       uint32_t typed    : 1;
-       uint32_t type_size : 2;
-       uint32_t pad0     : 9;
-       uint32_t src2_im  : 1;
-       uint32_t src2     : 8;
-
-       /* dword1: */
-       uint32_t src3     : 8;
-       uint32_t src3_im  : 1;
-       uint32_t dst_ssbo : 8;
-       uint32_t pad2     : 3;  // type
-       uint32_t pad3     : 2;
-       uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
-} instr_cat6stgb_t;
-
-typedef union PACKED {
-       instr_cat6a_t a;
-       instr_cat6b_t b;
-       instr_cat6c_t c;
-       instr_cat6d_t d;
-       instr_cat6ldgb_t ldgb;
-       instr_cat6stgb_t stgb;
-       struct PACKED {
-               /* dword0: */
-               uint32_t src_off  : 1;
-               uint32_t pad1     : 31;
-
-               /* dword1: */
-               uint32_t pad2     : 8;
-               uint32_t dst_off  : 1;
-               uint32_t pad3     : 8;
-               uint32_t type     : 3;
-               uint32_t g        : 1;  /* or in some cases it means dst immed */
-               uint32_t pad4     : 1;
-               uint32_t opc      : 5;
-               uint32_t jmp_tgt  : 1;
-               uint32_t sync     : 1;
-               uint32_t opc_cat  : 3;
-       };
-} instr_cat6_t;
-
-/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
- */
-typedef enum {
-       /* Use old GL binding model with an immediate index. */
-       CAT6_IMM = 0,
-
-       CAT6_UNIFORM = 1,
-
-       CAT6_NONUNIFORM = 2,
-
-       /* Use the bindless model, with an immediate index.
-        */
-       CAT6_BINDLESS_IMM = 4,
-
-       /* Use the bindless model, with a uniform register index.
-        */
-       CAT6_BINDLESS_UNIFORM = 5,
-
-       /* Use the bindless model, with a register index that isn't guaranteed
-        * to be uniform. This presumably checks if the indices are equal and
-        * splits up the load/store, because it works the way you would
-        * expect.
-        */
-       CAT6_BINDLESS_NONUNIFORM = 6,
-} cat6_desc_mode_t;
-
-/**
- * For atomic ops (which return a value):
- *
- *    pad1=1, pad3=c, pad5=3
- *    src1    - vecN offset/coords
- *    src2.x  - is actually dest register
- *    src2.y  - is 'data' except for cmpxchg where src2.y is 'compare'
- *              and src2.z is 'data'
- *
- * For stib (which does not return a value):
- *    pad1=0, pad3=c, pad5=2
- *    src1    - vecN offset/coords
- *    src2    - value to store
- *
- * For ldib:
- *    pad1=1, pad3=c, pad5=2
- *    src1    - vecN offset/coords
- *
- * for ldc (load from UBO using descriptor):
- *    pad1=0, pad3=8, pad5=2
- *
- * pad2 and pad5 are only observed to be 0.
- */
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t pad1     : 1;
-       uint32_t base     : 3;
-       uint32_t pad2     : 2;
-       uint32_t desc_mode : 3;
-       uint32_t d        : 2;
-       uint32_t typed    : 1;
-       uint32_t type_size : 2;
-       uint32_t opc      : 5;
-       uint32_t pad3     : 5;
-       uint32_t src1     : 8;  /* coordinate/offset */
-
-       /* dword1: */
-       uint32_t src2     : 8;  /* or the dst for load instructions */
-       uint32_t pad4     : 1;  //mustbe0 ??
-       uint32_t ssbo     : 8;  /* ssbo/image binding point */
-       uint32_t type     : 3;
-       uint32_t pad5     : 7;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
-} instr_cat6_a6xx_t;
-
-typedef struct PACKED {
-       /* dword0: */
-       uint32_t pad1     : 32;
-
-       /* dword1: */
-       uint32_t pad2     : 12;
-       uint32_t ss       : 1;  /* maybe in the encoding, but blob only uses (sy) */
-       uint32_t pad3     : 6;
-       uint32_t w        : 1;  /* write */
-       uint32_t r        : 1;  /* read */
-       uint32_t l        : 1;  /* local */
-       uint32_t g        : 1;  /* global */
-       uint32_t opc      : 4;  /* presumed, but only a couple known OPCs */
-       uint32_t jmp_tgt  : 1;  /* (jp) */
-       uint32_t sync     : 1;  /* (sy) */
-       uint32_t opc_cat  : 3;
-} instr_cat7_t;
-
-typedef union PACKED {
-       instr_cat0_t cat0;
-       instr_cat1_t cat1;
-       instr_cat2_t cat2;
-       instr_cat3_t cat3;
-       instr_cat4_t cat4;
-       instr_cat5_t cat5;
-       instr_cat6_t cat6;
-       instr_cat6_a6xx_t cat6_a6xx;
-       instr_cat7_t cat7;
-       struct PACKED {
-               /* dword0: */
-               uint32_t pad1     : 32;
-
-               /* dword1: */
-               uint32_t pad2     : 12;
-               uint32_t ss       : 1;  /* cat1-cat4 (cat0??) and cat7 (?) */
-               uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
-               uint32_t pad3     : 13;
-               uint32_t jmp_tgt  : 1;
-               uint32_t sync     : 1;
-               uint32_t opc_cat  : 3;
-
-       };
-} instr_t;
-
-static inline uint32_t instr_repeat(instr_t *instr)
-{
-       switch (instr->opc_cat) {
-       case 0:  return instr->cat0.repeat;
-       case 1:  return instr->cat1.repeat;
-       case 2:  return instr->cat2.repeat;
-       case 3:  return instr->cat3.repeat;
-       case 4:  return instr->cat4.repeat;
-       default: return 0;
-       }
-}
-
-static inline bool instr_sat(instr_t *instr)
-{
-       switch (instr->opc_cat) {
-       case 2:  return instr->cat2.sat;
-       case 3:  return instr->cat3.sat;
-       case 4:  return instr->cat4.sat;
-       default: return false;
-       }
-}
-
-/* We can probably drop the gpu_id arg, but keeping it for now so we can
- * assert if we see something we think should be new encoding on an older
- * gpu.
- */
-static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
-{
-       instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
-
-       /* At least one of these two bits is pad in all the possible
-        * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
-        * cmdstream traces I have indicates that the pad bit is zero
-        * in all cases.  So we can use this to detect new encoding:
-        */
-       if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
-               ir3_assert(gpu_id >= 600);
-               ir3_assert(instr->cat6.opc == 0);
-               return false;
-       }
-
-       return true;
-}
-
-static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
-{
-       switch (instr->opc_cat) {
-       case 0:  return instr->cat0.opc | instr->cat0.opc_hi << 4;
-       case 1:  return 0;
-       case 2:  return instr->cat2.opc;
-       case 3:  return instr->cat3.opc;
-       case 4:  return instr->cat4.opc;
-       case 5:  return instr->cat5.opc;
-       case 6:
-               if (!is_cat6_legacy(instr, gpu_id))
-                       return instr->cat6_a6xx.opc;
-               return instr->cat6.opc;
-       case 7:  return instr->cat7.opc;
-       default: return 0;
-       }
-}
-
-static inline bool is_mad(opc_t opc)
-{
-       switch (opc) {
-       case OPC_MAD_U16:
-       case OPC_MAD_S16:
-       case OPC_MAD_U24:
-       case OPC_MAD_S24:
-       case OPC_MAD_F16:
-       case OPC_MAD_F32:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool is_madsh(opc_t opc)
-{
-       switch (opc) {
-       case OPC_MADSH_U16:
-       case OPC_MADSH_M16:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool is_atomic(opc_t opc)
-{
-       switch (opc) {
-       case OPC_ATOMIC_ADD:
-       case OPC_ATOMIC_SUB:
-       case OPC_ATOMIC_XCHG:
-       case OPC_ATOMIC_INC:
-       case OPC_ATOMIC_DEC:
-       case OPC_ATOMIC_CMPXCHG:
-       case OPC_ATOMIC_MIN:
-       case OPC_ATOMIC_MAX:
-       case OPC_ATOMIC_AND:
-       case OPC_ATOMIC_OR:
-       case OPC_ATOMIC_XOR:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool is_ssbo(opc_t opc)
-{
-       switch (opc) {
-       case OPC_RESFMT:
-       case OPC_RESINFO:
-       case OPC_LDGB:
-       case OPC_STGB:
-       case OPC_STIB:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool is_isam(opc_t opc)
-{
-       switch (opc) {
-       case OPC_ISAM:
-       case OPC_ISAML:
-       case OPC_ISAMM:
-               return true;
-       default:
-               return false;
-       }
-}
-
-
-static inline bool is_cat2_float(opc_t opc)
-{
-       switch (opc) {
-       case OPC_ADD_F:
-       case OPC_MIN_F:
-       case OPC_MAX_F:
-       case OPC_MUL_F:
-       case OPC_SIGN_F:
-       case OPC_CMPS_F:
-       case OPC_ABSNEG_F:
-       case OPC_CMPV_F:
-       case OPC_FLOOR_F:
-       case OPC_CEIL_F:
-       case OPC_RNDNE_F:
-       case OPC_RNDAZ_F:
-       case OPC_TRUNC_F:
-               return true;
-
-       default:
-               return false;
-       }
-}
-
-static inline bool is_cat3_float(opc_t opc)
-{
-       switch (opc) {
-       case OPC_MAD_F16:
-       case OPC_MAD_F32:
-       case OPC_SEL_F16:
-       case OPC_SEL_F32:
-               return true;
-       default:
-               return false;
-       }
-}
-
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
-
-#endif /* INSTR_A3XX_H_ */
index 74b3fd6..b666dfc 100644 (file)
@@ -37,10 +37,7 @@ libfreedreno_cffdec = static_library(
     'cffdec.c',
     'cffdec.h',
     'disasm-a2xx.c',
-    'disasm-a3xx.c',
-    'disasm.h',
     'instr-a2xx.h',
-    'instr-a3xx.h',
     'pager.c',
     'pager.h',
     'rnnutil.c',
@@ -48,6 +45,7 @@ libfreedreno_cffdec = static_library(
     'util.h',
   ],
   include_directories: [
+    inc_freedreno,
     inc_freedreno_rnn,
     inc_include,
     inc_src,
@@ -55,7 +53,10 @@ libfreedreno_cffdec = static_library(
   c_args : [ no_override_init_args ],
   gnu_symbol_visibility: 'hidden',
   dependencies: [],
-  link_with: libfreedreno_rnn,
+  link_with: [
+    libfreedreno_rnn,
+    libfreedreno_ir3,  # for disasm_a3xx
+  ],
   build_by_default: false,
 )
 
@@ -85,6 +86,7 @@ if dep_lua.found() and dep_libarchive.found()
       'script.h'
     ],
     include_directories: [
+      inc_freedreno,
       inc_freedreno_rnn,
       inc_include,
       inc_src,
@@ -107,6 +109,7 @@ crashdec = executable(
   'crashdec',
   'crashdec.c',
   include_directories: [
+    inc_freedreno,
     inc_freedreno_rnn,
     inc_include,
     inc_src,
@@ -125,6 +128,7 @@ if dep_libarchive.found()
     'pgmdump',
     'pgmdump.c',
     include_directories: [
+      inc_freedreno,
       inc_include,
       inc_src,
     ],
@@ -133,6 +137,7 @@ if dep_libarchive.found()
     link_with: [
       libfreedreno_cffdec,
       libfreedreno_io,
+      libfreedreno_ir3,  # for disasm_a3xx
     ],
     build_by_default: with_tools.contains('freedreno'),
     install: false,
@@ -141,6 +146,7 @@ if dep_libarchive.found()
     'pgmdump2',
     'pgmdump2.c',
     include_directories: [
+      inc_freedreno,
       inc_include,
       inc_src,
     ],
@@ -149,6 +155,7 @@ if dep_libarchive.found()
     link_with: [
       libfreedreno_cffdec,
       libfreedreno_io,
+      libfreedreno_ir3,  # for disasm_a3xx
     ],
     build_by_default: with_tools.contains('freedreno'),
     install: false,
index ab5deae..932fdb5 100644 (file)
@@ -891,7 +891,7 @@ static void dump_program(struct state *state)
 int main(int argc, char **argv)
 {
        enum rd_sect_type type = RD_NONE;
-       enum debug_t debug = 0;
+       enum debug_t debug = PRINT_RAW | PRINT_STATS;
        void *buf = NULL;
        int sz;
        struct io *io;
@@ -945,7 +945,8 @@ int main(int argc, char **argv)
                return -1;
        }
 
-       disasm_set_debug(debug);
+       disasm_a2xx_set_debug(debug);
+       disasm_a3xx_set_debug(debug);
 
        infile = argv[1];
 
index 7410bcd..94db08d 100644 (file)
@@ -440,7 +440,7 @@ static void dump_program(struct state *state)
 int main(int argc, char **argv)
 {
        enum rd_sect_type type = RD_NONE;
-       enum debug_t debug = 0;
+       enum debug_t debug = PRINT_RAW | PRINT_STATS;
        void *buf = NULL;
        int sz;
        struct io *io;
@@ -494,7 +494,7 @@ int main(int argc, char **argv)
                return -1;
        }
 
-       disasm_set_debug(debug);
+       disasm_a3xx_set_debug(debug);
 
        infile = argv[1];
 
index d6a1c15..b841631 100644 (file)
 
 #include <util/u_debug.h>
 
+#include "disasm.h"
 #include "instr-a3xx.h"
-
-/* bitmask of debug flags */
-enum debug_t {
-       PRINT_RAW      = 0x1,    /* dump raw hexdump */
-       PRINT_VERBOSE  = 0x2,
-       EXPAND_REPEAT  = 0x4,
-};
+#include "regmask.h"
 
 static enum debug_t debug;
 
@@ -80,12 +75,28 @@ struct disasm_ctx {
        int level;
        unsigned gpu_id;
 
+       struct shader_stats *stats;
+
+       /* we have to process the dst register after src to avoid tripping up
+        * the read-before-write detection
+        */
+       unsigned last_dst;
+       bool last_dst_full;
+       bool last_dst_valid;
+
        /* current instruction repeat flag: */
        unsigned repeat;
        /* current instruction repeat indx/offset (for --expand): */
        unsigned repeatidx;
 
-       unsigned instructions;
+       /* tracking for register usage */
+       struct {
+               regmask_t used;
+               regmask_t used_merged;
+               regmask_t rbw;      /* read before write */
+               regmask_t war;      /* write after read */
+               unsigned max_const;
+       } regs;
 };
 
 static const char *float_imms[] = {
@@ -157,6 +168,24 @@ static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full,
        }
 }
 
+static void regmask_set(regmask_t *regmask, unsigned num, bool full)
+{
+       ir3_assert(num < MAX_REG);
+       __regmask_set(regmask, !full, num);
+}
+
+static void regmask_clear(regmask_t *regmask, unsigned num, bool full)
+{
+       ir3_assert(num < MAX_REG);
+       __regmask_clear(regmask, !full, num);
+}
+
+static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full)
+{
+       ir3_assert(num < MAX_REG);
+       return __regmask_get(regmask, !full, num);
+}
+
 static unsigned regidx(reg_t reg)
 {
        return (4 * reg.num) + reg.comp;
@@ -170,8 +199,127 @@ static reg_t idxreg(unsigned idx)
        };
 }
 
+static void print_sequence(struct disasm_ctx *ctx, int first, int last)
+{
+       if (first != MAX_REG) {
+               if (first == last) {
+                       fprintf(ctx->out, " %d", first);
+               } else {
+                       fprintf(ctx->out, " %d-%d", first, last);
+               }
+       }
+}
+
+static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full)
+{
+       int num, max = 0, cnt = 0;
+       int first, last;
+
+       first = last = MAX_REG;
+
+       for (num = 0; num < MAX_REG; num++) {
+               if (regmask_get(regmask, num, full)) {
+                       if (num != (last + 1)) {
+                               print_sequence(ctx, first, last);
+                               first = num;
+                       }
+                       last = num;
+                       if (num < (48*4))
+                               max = num;
+                       cnt++;
+               }
+       }
+
+       print_sequence(ctx, first, last);
+
+       fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max);
+
+       return max;
+}
+
+static void print_reg_stats(struct disasm_ctx *ctx)
+{
+       int fullreg, halfreg;
+
+       fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]);
+       fprintf(ctx->out, "%s- used (half):", levels[ctx->level]);
+       halfreg = print_regs(ctx, &ctx->regs.used, false);
+       fprintf(ctx->out, "\n");
+       fprintf(ctx->out, "%s- used (full):", levels[ctx->level]);
+       fullreg = print_regs(ctx, &ctx->regs.used, true);
+       fprintf(ctx->out, "\n");
+       if (ctx->gpu_id >= 600) {
+               fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]);
+               print_regs(ctx, &ctx->regs.used_merged, false);
+               fprintf(ctx->out, "\n");
+       }
+       fprintf(ctx->out, "%s- input (half):", levels[ctx->level]);
+       print_regs(ctx, &ctx->regs.rbw, false);
+       fprintf(ctx->out, "\n");
+       fprintf(ctx->out, "%s- input (full):", levels[ctx->level]);
+       print_regs(ctx, &ctx->regs.rbw, true);
+       fprintf(ctx->out, "\n");
+       fprintf(ctx->out, "%s- max const: %u\n", levels[ctx->level], ctx->regs.max_const);
+       fprintf(ctx->out, "\n");
+       fprintf(ctx->out, "%s- output (half):", levels[ctx->level]);
+       print_regs(ctx, &ctx->regs.war, false);
+       fprintf(ctx->out, "  (estimated)\n");
+       fprintf(ctx->out, "%s- output (full):", levels[ctx->level]);
+       print_regs(ctx, &ctx->regs.war, true);
+       fprintf(ctx->out, "  (estimated)\n");
+
+       /* convert to vec4, which is the granularity that registers are
+        * assigned to shader:
+        */
+       fullreg = (fullreg + 3) / 4;
+       halfreg = (halfreg + 3) / 4;
+
+       // Note this count of instructions includes rptN, which matches
+       // up to how mesa prints this:
+       fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, "
+                       "(%d instlen), %d half, %d full\n",
+                       levels[ctx->level], ctx->stats->instructions, ctx->stats->nops,
+                       ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen,
+                       halfreg, fullreg);
+       fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level],
+                       ctx->stats->ss, ctx->stats->sy);
+}
+
+static void process_reg_dst(struct disasm_ctx *ctx)
+{
+       if (!ctx->last_dst_valid)
+               return;
+
+       /* ignore dummy writes (ie. r63.x): */
+       if (!VALIDREG(ctx->last_dst))
+               return;
+
+       for (unsigned i = 0; i <= ctx->repeat; i++) {
+               unsigned dst = ctx->last_dst + i;
+
+               regmask_set(&ctx->regs.war, dst, ctx->last_dst_full);
+               regmask_set(&ctx->regs.used, dst, ctx->last_dst_full);
+
+               if (ctx->gpu_id >= 600) {
+                       if (ctx->last_dst_full) {
+                               regmask_set(&ctx->regs.used_merged, (dst*2)+0, false);
+                               regmask_set(&ctx->regs.used_merged, (dst*2)+1, false);
+                       } else {
+                               regmask_set(&ctx->regs.used_merged, dst, false);
+                       }
+               }
+       }
+
+       ctx->last_dst_valid = false;
+}
 static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel)
 {
+       /* presumably the special registers a0.c and p0.c don't count.. */
+       if (!(addr_rel || (reg.num == REG_A0) || (reg.num == REG_P0))) {
+               ctx->last_dst = regidx(reg);
+               ctx->last_dst_full = full;
+               ctx->last_dst_valid = true;
+       }
        reg = idxreg(regidx(reg) + ctx->repeatidx);
        print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel);
 }
@@ -196,6 +344,45 @@ static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
 {
        reg_t reg = info->reg;
 
+       /* presumably the special registers a0.c and p0.c don't count.. */
+       if (!(info->addr_rel || info->c || info->im ||
+                       (reg.num == REG_A0) || (reg.num == REG_P0))) {
+               int i, num = regidx(reg);
+               for (i = 0; i <= ctx->repeat; i++) {
+                       unsigned src = num + i;
+
+                       if (!regmask_get(&ctx->regs.used, src, info->full))
+                               regmask_set(&ctx->regs.rbw, src, info->full);
+
+                       regmask_clear(&ctx->regs.war, src, info->full);
+                       regmask_set(&ctx->regs.used, src, info->full);
+
+                       if (info->full) {
+                               regmask_set(&ctx->regs.used_merged, (src*2)+0, false);
+                               regmask_set(&ctx->regs.used_merged, (src*2)+1, false);
+                       } else {
+                               regmask_set(&ctx->regs.used_merged, src, false);
+                       }
+
+                       if (!info->r)
+                               break;
+               }
+       } else if (info->c) {
+               int i, num = regidx(reg);
+               for (i = 0; i <= ctx->repeat; i++) {
+                       unsigned src = num + i;
+
+                       ctx->regs.max_const = MAX2(ctx->regs.max_const, src);
+
+                       if (!info->r)
+                               break;
+               }
+
+               unsigned max = (num + ctx->repeat + 1 + 3) / 4;
+               if (max > ctx->stats->constlen)
+                       ctx->stats->constlen = max;
+       }
+
        if (info->r)
                reg = idxreg(regidx(info->reg) + ctx->repeatidx);
 
@@ -1309,12 +1496,10 @@ static const struct opc_info {
 
 #define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
 
-// XXX hack.. probably should move this table somewhere common:
-#include "ir3.h"
-const char *ir3_instr_name(struct ir3_instruction *instr)
+const char *disasm_a3xx_instr_name(opc_t opc)
 {
-       if (opc_cat(instr->opc) == -1) return "??meta??";
-       return opcs[instr->opc].name;
+       if (opc_cat(opc) == -1) return "??meta??";
+       return opcs[opc].name;
 }
 
 static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr)
@@ -1346,11 +1531,11 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
        instr_t *instr = (instr_t *)dwords;
        uint32_t opc = instr_opc(instr, ctx->gpu_id);
        unsigned nop = 0;
-       unsigned cycles = ctx->instructions;
+       unsigned cycles = ctx->stats->instructions;
 
-       if (debug & PRINT_VERBOSE) {
-               fprintf(ctx->out, "%s%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
-                               n, cycles++, dwords[1], dwords[0]);
+       if (debug & PRINT_RAW) {
+               fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
+                               instr->opc_cat, n, cycles++, dwords[1], dwords[0]);
        }
 
        /* NOTE: order flags are printed is a bit fugly.. but for now I
@@ -1359,13 +1544,16 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
         */
 
        ctx->repeat = instr_repeat(instr);
-       ctx->instructions += 1 + ctx->repeat;
+       ctx->stats->instructions += 1 + ctx->repeat;
+       ctx->stats->instlen++;
 
        if (instr->sync) {
                fprintf(ctx->out, "(sy)");
+               ctx->stats->sy++;
        }
        if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) {
                fprintf(ctx->out, "(ss)");
+               ctx->stats->ss++;
        }
        if (instr->jmp_tgt)
                fprintf(ctx->out, "(jp)");
@@ -1379,7 +1567,10 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
                nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r;
        else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r))
                nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r;
-       ctx->instructions += nop;
+       ctx->stats->instructions += nop;
+       ctx->stats->nops += nop;
+       if (opc == OPC_NOP)
+               ctx->stats->nops += 1 + ctx->repeat;
        if (nop)
                fprintf(ctx->out, "(nop%d) ", nop);
 
@@ -1389,20 +1580,22 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
        print_single_instr(ctx, instr);
        fprintf(ctx->out, "\n");
 
+       process_reg_dst(ctx);
+
        if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) {
                int i;
                for (i = 0; i < nop; i++) {
                        if (debug & PRINT_VERBOSE) {
-                               fprintf(ctx->out, "%s%04d:%04d[                   ] ",
-                                               levels[ctx->level], n, cycles++);
+                               fprintf(ctx->out, "%s:%d:%04d:%04d[                   ] ",
+                                               levels[ctx->level], instr->opc_cat, n, cycles++);
                        }
                        fprintf(ctx->out, "nop\n");
                }
                for (i = 0; i < ctx->repeat; i++) {
                        ctx->repeatidx = i + 1;
                        if (debug & PRINT_VERBOSE) {
-                               fprintf(ctx->out, "%s%04d:%04d[                   ] ",
-                                               levels[ctx->level], n, cycles++);
+                               fprintf(ctx->out, "%s:%d:%04d:%04d[                   ] ",
+                                               levels[ctx->level], instr->opc_cat, n, cycles++);
                        }
                        print_single_instr(ctx, instr);
                        fprintf(ctx->out, "\n");
@@ -1410,24 +1603,37 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
                ctx->repeatidx = 0;
        }
 
-       return (instr->opc_cat == 0) && (opc == OPC_END);
+       return (instr->opc_cat == 0) &&
+               ((opc == OPC_END) || (opc == OPC_CHSH));
 }
 
 int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
 {
+       struct shader_stats stats;
+       return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats);
+}
+
+int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
+               unsigned gpu_id, struct shader_stats *stats)
+{
        struct disasm_ctx ctx;
        int i;
        int nop_count = 0;
+       bool has_end = false;
 
-       assert((sizedwords % 2) == 0);
+       ir3_assert((sizedwords % 2) == 0);
 
        memset(&ctx, 0, sizeof(ctx));
        ctx.out = out;
        ctx.level = level;
        ctx.gpu_id = gpu_id;
+       ctx.stats = stats;
+       memset(ctx.stats, 0, sizeof(*ctx.stats));
 
        for (i = 0; i < sizedwords; i += 2) {
-               print_instr(&ctx, &dwords[i], i/2);
+               has_end |= print_instr(&ctx, &dwords[i], i/2);
+               if (!has_end)
+                       continue;
                if (dwords[i] == 0 && dwords[i + 1] == 0)
                        nop_count++;
                else
@@ -1436,5 +1642,13 @@ int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned
                        break;
        }
 
+       if (debug & PRINT_STATS)
+               print_reg_stats(&ctx);
+
        return 0;
 }
+
+void disasm_a3xx_set_debug(enum debug_t d)
+{
+       debug = d;
+}
index 4ffcb7a..1b0f708 100644 (file)
 #include <stdbool.h>
 #include <assert.h>
 
+void ir3_assert_handler(const char *expr, const char *file, int line,
+               const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
+
+/* A wrapper for assert() that allows overriding handling of a failed
+ * assert.  This is needed for tools like crashdec which can want to
+ * attempt to disassemble memory that might not actually be valid
+ * instructions.
+ */
+#define ir3_assert(expr) do { \
+               if (!(expr)) { \
+                       if (ir3_assert_handler) { \
+                               ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
+                       } \
+                       assert(expr); \
+               } \
+       } while (0)
 /* size of largest OPC field of all the instruction categories: */
 #define NOPC_BITS 6
 
@@ -249,6 +265,8 @@ typedef enum {
 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
 #define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
 
+const char *disasm_a3xx_instr_name(opc_t opc);
+
 typedef enum {
        TYPE_F16 = 0,
        TYPE_F32 = 1,
@@ -275,7 +293,7 @@ static inline uint32_t type_size(type_t type)
        case TYPE_S8:
                return 8;
        default:
-               assert(0); /* invalid type */
+               ir3_assert(0); /* invalid type */
                return 0;
        }
 }
@@ -315,6 +333,21 @@ typedef union PACKED {
        int32_t  idummy8   : 8;
 } reg_t;
 
+/* comp:
+ *   0 - x
+ *   1 - y
+ *   2 - z
+ *   3 - w
+ */
+static inline uint32_t regid(int num, int comp)
+{
+       return (num << 2) | (comp & 0x3);
+}
+
+#define INVALID_REG      regid(63, 0)
+#define VALIDREG(r)      ((r) != INVALID_REG)
+#define CONDREG(r, val)  COND(VALIDREG(r), (val))
+
 /* special registers: */
 #define REG_A0 61       /* address register */
 #define REG_P0 62       /* predicate register */
@@ -979,8 +1012,8 @@ static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
         * in all cases.  So we can use this to detect new encoding:
         */
        if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
-               assert(gpu_id >= 600);
-               assert(instr->cat6.opc == 0);
+               ir3_assert(gpu_id >= 600);
+               ir3_assert(instr->cat6.opc == 0);
                return false;
        }
 
@@ -1114,6 +1147,4 @@ static inline bool is_cat3_float(opc_t opc)
        }
 }
 
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
-
 #endif /* INSTR_A3XX_H_ */
index 1c0799f..0a7ab73 100644 (file)
@@ -631,19 +631,6 @@ bool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags);
                set_foreach ((__instr)->uses, __entry) \
                        if ((__use = (void *)__entry->key))
 
-#define MAX_ARRAYS 16
-
-/* comp:
- *   0 - x
- *   1 - y
- *   2 - z
- *   3 - w
- */
-static inline uint32_t regid(int num, int comp)
-{
-       return (num << 2) | (comp & 0x3);
-}
-
 static inline uint32_t reg_num(struct ir3_register *reg)
 {
        return reg->num >> 2;
@@ -654,10 +641,6 @@ static inline uint32_t reg_comp(struct ir3_register *reg)
        return reg->num & 0x3;
 }
 
-#define INVALID_REG      regid(63, 0)
-#define VALIDREG(r)      ((r) != INVALID_REG)
-#define CONDREG(r, val)  COND(VALIDREG(r), (val))
-
 static inline bool is_flow(struct ir3_instruction *instr)
 {
        return (opc_cat(instr->opc) == 0);
index 458e3d9..115cea0 100644 (file)
@@ -114,7 +114,7 @@ static void print_instr_name(struct ir3_instruction *instr, bool flags)
                printf(".%s%s", type_name(instr->cat1.src_type),
                                type_name(instr->cat1.dst_type));
        } else {
-               printf("%s", ir3_instr_name(instr));
+               printf("%s", disasm_a3xx_instr_name(instr->opc));
                if (instr->flags & IR3_INSTR_3D)
                        printf(".3d");
                if (instr->flags & IR3_INSTR_A)
index 146cc35..99cacbf 100644 (file)
@@ -36,6 +36,8 @@
 #include "ir3_compiler.h"
 #include "ir3_nir.h"
 
+#include "disasm.h"
+
 int
 ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
 {
index 6eade8d..4dded90 100644 (file)
@@ -87,6 +87,31 @@ __regmask_set(regmask_t *regmask, bool half, unsigned n)
 }
 
 static inline void
+__regmask_clear(regmask_t *regmask, bool half, unsigned n)
+{
+       if (regmask->mergedregs) {
+               /* a6xx+ case, with merged register file, we track things in terms
+                * of half-precision registers, with a full precisions register
+                * using two half-precision slots:
+                */
+               if (half) {
+                       BITSET_CLEAR(regmask->mask, n);
+               } else {
+                       n *= 2;
+                       BITSET_CLEAR(regmask->mask, n);
+                       BITSET_CLEAR(regmask->mask, n+1);
+               }
+       } else {
+               /* pre a6xx case, with separate register file for half and full
+                * precision:
+                */
+               if (half)
+                       n += MAX_REG;
+               BITSET_CLEAR(regmask->mask, n);
+       }
+}
+
+static inline void
 regmask_init(regmask_t *regmask, bool mergedregs)
 {
        memset(&regmask->mask, 0, sizeof(regmask->mask));
index e6f8f7b..1b16fad 100644 (file)
@@ -35,7 +35,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "util/macros.h"
-#include "instr-a3xx.h"
+#include "disasm.h"
 
 #define INSTR_5XX(i, d) { .gpu_id = 540, .instr = #i, .expected = d }
 #define INSTR_6XX(i, d) { .gpu_id = 630, .instr = #i, .expected = d }
index 3df6dfb..6a1a16e 100644 (file)
@@ -18,7 +18,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-inc_freedreno = include_directories(['.', './registers'])
+inc_freedreno = include_directories(['.', './registers', './common'])
 inc_freedreno_rnn = include_directories('rnn')
 
 subdir('common')
index b564730..43c6333 100644 (file)
@@ -112,7 +112,7 @@ static void print_export_comment(uint32_t num, gl_shader_stage type)
                }
                break;
        default:
-               unreachable("not reached");
+               assert(!"not reached");
        }
        /* if we had a symbol table here, we could look
         * up the name of the varying..
@@ -629,7 +629,7 @@ int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage typ
        return 0;
 }
 
-void disasm_set_debug(enum debug_t d)
+void disasm_a2xx_set_debug(enum debug_t d)
 {
        debug = d;
 }
diff --git a/src/gallium/drivers/freedreno/disasm.h b/src/gallium/drivers/freedreno/disasm.h
deleted file mode 100644 (file)
index dc29b2f..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright © 2012 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef DISASM_H_
-#define DISASM_H_
-
-#include <stdio.h>
-#include <stdbool.h>
-
-#include "compiler/shader_enums.h"
-#include "util/u_debug.h"
-
-/* bitmask of debug flags */
-enum debug_t {
-       PRINT_RAW      = 0x1,    /* dump raw hexdump */
-       PRINT_VERBOSE  = 0x2,
-};
-
-int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type);
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
-void disasm_set_debug(enum debug_t debug);
-
-#endif /* DISASM_H_ */
index bb1ffa6..8179e78 100644 (file)
@@ -19,7 +19,6 @@
 # SOFTWARE.
 
 files_libfreedreno = files(
-  'disasm.h',
   'freedreno_batch.c',
   'freedreno_batch.h',
   'freedreno_batch_cache.c',