radeonsi: write shader asm annotated with wave info into GPU hang reports
authorMarek Olšák <marek.olsak@amd.com>
Tue, 29 Nov 2016 14:35:11 +0000 (15:35 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 10 Feb 2017 10:27:50 +0000 (11:27 +0100)
commit28c06b3ceb7455538da301e03c9e704902cc1056
tree999fdb8f7702106c7a369d1cb36c5f4c783f3473
parent3de8c5a3c59fa79b139d9630c82fc4537aa3e004
radeonsi: write shader asm annotated with wave info into GPU hang reports

Note that the disassembly is written twice - first the unmodified compiler
output and then the wave-annotated output only if there are waves executing
the shader.

Sample output from a real GPU hang most likely caused by image_sample:

The number of active waves = 28

Pixel Shader - annotated disassembly:
    s_mov_b64 s[6:7], exec                                        ; BE86017E [PC=0x10f3e3800, off=0, size=4]
    s_wqm_b64 exec, exec                                          ; BEFE077E [PC=0x10f3e3804, off=4, size=4]
...
    image_sample v[7:9], v[0:1], s[12:19], s[20:23] dmask:0x7     ; F0800700 00A30700 [PC=0x10f3e3a94, off=660, size=8]
    s_buffer_load_dword s20, s[0:3], 0x50                         ; C0220500 00000050 [PC=0x10f3e3a9c, off=668, size=8]
    s_load_dwordx4 s[24:27], s[4:5], 0x170                        ; C00A0602 00000170 [PC=0x10f3e3aa4, off=676, size=8]
    s_load_dwordx8 s[12:19], s[4:5], 0x140                        ; C00E0302 00000140 [PC=0x10f3e3aac, off=684, size=8]
    s_buffer_load_dword s11, s[0:3], 0x5c                         ; C02202C0 0000005C [PC=0x10f3e3ab4, off=692, size=8]
    s_buffer_load_dword s21, s[0:3], 0x54                         ; C0220540 00000054 [PC=0x10f3e3abc, off=700, size=8]
    s_buffer_load_dword s22, s[0:3], 0x58                         ; C0220580 00000058 [PC=0x10f3e3ac4, off=708, size=8]
    s_waitcnt vmcnt(0)                                            ; BF8C0F70 [PC=0x10f3e3acc, off=716, size=4]
          ^ SE0 SH0 CU1 SIMD1 WAVE0  EXEC=aaaaaaa555aaaaaa  INST32=BF8C0F70
          ^ SE0 SH0 CU1 SIMD2 WAVE0  EXEC=aaaa85555555552a  INST32=BF8C0F70
          ^ SE0 SH0 CU1 SIMD3 WAVE0  EXEC=000000000000000a  INST32=BF8C0F70
          ^ SE0 SH0 CU6 SIMD1 WAVE0  EXEC=25a5a5aa82aaaaaa  INST32=BF8C0F70
          ^ SE0 SH0 CU6 SIMD3 WAVE0  EXEC=50aaaa8fffa55555  INST32=BF8C0F70
          ^ SE0 SH0 CU7 SIMD0 WAVE0  EXEC=5554aaaaaaa1a555  INST32=BF8C0F70
          ^ SE0 SH0 CU7 SIMD0 WAVE1  EXEC=aaaa5555ffffffff  INST32=BF8C0F70
          ^ SE0 SH0 CU7 SIMD1 WAVE0  EXEC=555557aaaaaaaaa5  INST32=BF8C0F70
          ^ SE0 SH0 CU7 SIMD3 WAVE0  EXEC=5555aaaaaaaaaa85  INST32=BF8C0F70
          ^ SE1 SH0 CU3 SIMD1 WAVE0  EXEC=aaaaaaaaaaaaaaaa  INST32=BF8C0F70
          ^ SE1 SH0 CU4 SIMD0 WAVE0  EXEC=aaaaaaaa5a5a5a5a  INST32=BF8C0F70
          ^ SE1 SH0 CU4 SIMD1 WAVE0  EXEC=aaaaaaa5a5a5a4a5  INST32=BF8C0F70
          ^ SE1 SH0 CU4 SIMD2 WAVE0  EXEC=5555555000000000  INST32=BF8C0F70
          ^ SE1 SH0 CU4 SIMD3 WAVE0  EXEC=aa555554155aaaaa  INST32=BF8C0F70
          ^ SE1 SH0 CU5 SIMD0 WAVE0  EXEC=55ffff55555555aa  INST32=BF8C0F70
          ^ SE1 SH0 CU5 SIMD1 WAVE0  EXEC=555555555aaaaaaa  INST32=BF8C0F70
          ^ SE1 SH0 CU5 SIMD2 WAVE0  EXEC=a0aaaaaaa8555555  INST32=BF8C0F70
          ^ SE1 SH0 CU5 SIMD3 WAVE0  EXEC=8aaaaaaaaaaaa555  INST32=BF8C0F70
          ^ SE1 SH0 CU6 SIMD0 WAVE0  EXEC=000000002aaaaaaa  INST32=BF8C0F70
          ^ SE2 SH0 CU1 SIMD0 WAVE0  EXEC=5aaaa5400aaaa15a  INST32=BF8C0F70
          ^ SE2 SH0 CU1 SIMD1 WAVE0  EXEC=00aaaaaaaa5555aa  INST32=BF8C0F70
          ^ SE2 SH0 CU1 SIMD2 WAVE0  EXEC=aa00005555554555  INST32=BF8C0F70
          ^ SE2 SH0 CU1 SIMD3 WAVE0  EXEC=aaaaaaa000000000  INST32=BF8C0F70
          ^ SE3 SH0 CU4 SIMD0 WAVE0  EXEC=5555aaaaaaaaaaaa  INST32=BF8C0F70
          ^ SE3 SH0 CU4 SIMD2 WAVE0  EXEC=ffaaaaaaaaaa5555  INST32=BF8C0F70
          ^ SE3 SH0 CU4 SIMD3 WAVE0  EXEC=aaaa55555555aa00  INST32=BF8C0F70
          ^ SE3 SH0 CU5 SIMD0 WAVE0  EXEC=00aaaaaaaaaaaa5a  INST32=BF8C0F70
          ^ SE3 SH0 CU5 SIMD1 WAVE0  EXEC=5a555555005555ff  INST32=BF8C0F70
    v_mul_f32_e32 v7, s6, v7                                      ; 0A0E0E06 [PC=0x10f3e3ad0, off=720, size=4]
...

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_debug.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h