aco: fix disassembly of SMEM with both SGPR and constant offset
authorRhys Perry <pendingchaos02@gmail.com>
Tue, 12 Apr 2022 14:19:40 +0000 (15:19 +0100)
committerMarge Bot <emma+marge@anholt.net>
Thu, 14 Apr 2022 20:58:36 +0000 (20:58 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15890>

src/amd/compiler/aco_print_asm.cpp
src/amd/compiler/tests/test_assembler.cpp

index 673f415..7c2141c 100644 (file)
@@ -305,6 +305,23 @@ disasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t* binary, uns
       size = l / 4;
    }
 
+   /* See: https://github.com/GPUOpen-Tools/radeon_gpu_profiler/issues/65 and
+    * https://github.com/llvm/llvm-project/issues/38652
+    */
+   if (chip == GFX9 && (binary[pos] & 0xfc024000) == 0xc0024000) {
+      /* SMEM with IMM=1 and SOE=1: LLVM ignores SOFFSET */
+      size_t len = strlen(outline);
+      snprintf(outline + len, outline_size - len, ", s%u", binary[pos + 1] >> 25);
+   } else if (chip >= GFX10 && (binary[pos] & 0xfc000000) == 0xf4000000 &&
+              (binary[pos + 1] & 0xfe000000) != 0xfa000000) {
+      /* SMEM non-NULL SOFFSET: LLVM ignores OFFSET */
+      uint32_t offset = binary[pos + 1] & 0x1fffff;
+      if (offset) {
+         size_t len = strlen(outline);
+         snprintf(outline + len, outline_size - len, ", 0x%x", offset);
+      }
+   }
+
    return std::make_pair(invalid, size);
 }
 
index e970eba..e068aca 100644 (file)
@@ -266,3 +266,28 @@ BEGIN_TEST(assembler.v_add3_clamp)
       finish_assembler_test();
    }
 END_TEST
+
+BEGIN_TEST(assembler.smem_offset)
+   for (unsigned i = GFX9; i <= GFX10; i++) {
+      if (!setup_cs(NULL, (chip_class)i))
+         continue;
+
+      Definition dst(PhysReg(7), s1);
+      Operand sbase(PhysReg(6), s2);
+      Operand offset(PhysReg(5), s1);
+
+      //~gfx9>> s_load_dword s7, s[6:7], s5 ; c00001c3 00000005
+      //~gfx10>> s_load_dword s7, s[6:7], s5 ; f40001c3 0a000000
+      bld.smem(aco_opcode::s_load_dword, dst, sbase, offset);
+      //~gfx9! s_load_dword s7, s[6:7], 0x42 ; c00201c3 00000042
+      //~gfx10! s_load_dword s7, s[6:7], 0x42 ; f40001c3 fa000042
+      bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42));
+      if (i >= GFX9) {
+         //~gfx9! s_load_dword s7, s[6:7], 0x42, s5 ; c00241c3 0a000042
+         //~gfx10! s_load_dword s7, s[6:7], s5, 0x42 ; f40001c3 0a000042
+         bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42), offset);
+      }
+
+      finish_assembler_test();
+   }
+END_TEST