[AMDGPU] llvm-objdump: Minimal HSA Code Object disassembler support.
authorValery Pykhtin <Valery.Pykhtin@amd.com>
Thu, 7 Apr 2016 07:24:01 +0000 (07:24 +0000)
committerValery Pykhtin <Valery.Pykhtin@amd.com>
Thu, 7 Apr 2016 07:24:01 +0000 (07:24 +0000)
Reenable reverted r265550 with endianness issue fixed. Variables of
endian-aware types such as ulittle32_t should be explicitly casted
to their natural equivalent types before passing it as vararg to
printf like functions (format in my case). Added lit config file
depending on AMDGPU target as the testcase uses assembler.

Differential revision: http://reviews.llvm.org/D16998

llvm-svn: 265645

llvm/test/Object/AMDGPU/lit.local.cfg [new file with mode: 0644]
llvm/test/Object/AMDGPU/objdump.s [new file with mode: 0644]
llvm/tools/llvm-objdump/llvm-objdump.cpp

diff --git a/llvm/test/Object/AMDGPU/lit.local.cfg b/llvm/test/Object/AMDGPU/lit.local.cfg
new file mode 100644 (file)
index 0000000..2a665f0
--- /dev/null
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Object/AMDGPU/objdump.s b/llvm/test/Object/AMDGPU/objdump.s
new file mode 100644 (file)
index 0000000..997bcb1
--- /dev/null
@@ -0,0 +1,75 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga %s -filetype=obj | llvm-objdump -disassemble -arch-name=amdgcn -mcpu=tonga - | FileCheck %s
+
+       .text
+
+       .amdgpu_hsa_kernel hello_world
+hello_world:
+  .amd_kernel_code_t
+  .end_amd_kernel_code_t
+
+       s_mov_b32 m0, 0x10000
+       s_load_dwordx2 s[0:1], s[4:5], 0x8
+       s_waitcnt lgkmcnt(0)
+       s_add_u32 s0, s7, s0
+       v_add_i32_e32 v1, vcc, s0, v1
+       s_movk_i32 s0, 0x483
+       v_cmp_ge_i32_e32 vcc, s0, v0
+       s_and_saveexec_b64 s[0:1], vcc
+       v_lshlrev_b32_e32 v4, 2, v0
+       s_cbranch_execz 21
+       s_mov_b64 s[2:3], exec
+       s_mov_b64 s[10:11], exec
+       v_mov_b32_e32 v3, v0
+        s_endpgm
+
+       .amdgpu_hsa_kernel hello_world2
+hello_world2:
+  .amd_kernel_code_t
+  .end_amd_kernel_code_t
+
+       s_and_saveexec_b64 s[0:1], vcc
+       s_cbranch_execz 85
+       s_load_dwordx4 s[8:11], s[4:5], 0x40
+       v_ashrrev_i32_e32 v77, 31, v76
+       v_lshlrev_b64 v[10:11], 2, v[76:77]
+       s_waitcnt lgkmcnt(0)
+       v_add_i32_e32 v10, vcc, s8, v10
+       v_mov_b32_e32 v6, s9
+       v_addc_u32_e32 v11, vcc, v6, v11, vcc
+       flat_load_dword v0, v[10:11]
+       v_lshlrev_b32_e32 v6, 5, v8
+       v_lshlrev_b32_e32 v7, 2, v7
+        s_endpgm
+
+// CHECK:  file format ELF64-amdgpu-hsacobj
+// CHECK:  Disassembly of section .hsatext:
+// CHECK:  hello_world:
+// CHECK:  s_mov_b32 m0, 0x10000                                      // 000000000100: BEFC00FF 00010000
+// CHECK:  s_load_dwordx2 s[0:1], s[4:5], 0x8                         // 000000000108: C0060002 00000008
+// CHECK:  s_waitcnt lgkmcnt(0)                                       // 000000000110: BF8C007F
+// CHECK:  s_add_u32 s0, s7, s0                                       // 000000000114: 80000007
+// CHECK:  v_add_i32_e32 v1, vcc, s0, v1                              // 000000000118: 32020200
+// CHECK:  s_movk_i32 s0, 0x483                                       // 00000000011C: B0000483
+// CHECK:  v_cmp_ge_i32_e32 vcc, s0, v0                               // 000000000120: 7D8C0000
+// CHECK:  s_and_saveexec_b64 s[0:1], vcc                             // 000000000124: BE80206A
+// CHECK:  v_lshlrev_b32_e32 v4, 2, v0                                // 000000000128: 24080082
+// CHECK:  s_cbranch_execz 21                                         // 00000000012C: BF880015
+// CHECK:  s_mov_b64 s[2:3], exec                                     // 000000000130: BE82017E
+// CHECK:  s_mov_b64 s[10:11], exec                                   // 000000000134: BE8A017E
+// CHECK:  v_mov_b32_e32 v3, v0                                       // 000000000138: 7E060300
+// CHECK:  s_endpgm                                                   // 00000000013C: BF810000
+
+// CHECK:  hello_world2:
+// CHECK:  s_and_saveexec_b64 s[0:1], vcc                             // 000000000240: BE80206A
+// CHECK:  s_cbranch_execz 85                                         // 000000000244: BF880055
+// CHECK:  s_load_dwordx4 s[8:11], s[4:5], 0x40                       // 000000000248: C00A0202 00000040
+// CHECK:  v_ashrrev_i32_e32 v77, 31, v76                             // 000000000250: 229A989F
+// CHECK:  v_lshlrev_b64 v[10:11], 2, v[76:77]                        // 000000000254: D28F000A 00029882
+// CHECK:  s_waitcnt lgkmcnt(0)                                       // 00000000025C: BF8C007F
+// CHECK:  v_add_i32_e32 v10, vcc, s8, v10                            // 000000000260: 32141408
+// CHECK:  v_mov_b32_e32 v6, s9                                       // 000000000264: 7E0C0209
+// CHECK:  v_addc_u32_e32 v11, vcc, v6, v11, vcc                      // 000000000268: 38161706
+// CHECK:  flat_load_dword v0, v[10:11]                               // 00000000026C: DC500000 0000000A
+// CHECK:  v_lshlrev_b32_e32 v6, 5, v8                                // 000000000274: 240C1085
+// CHECK:  v_lshlrev_b32_e32 v7, 2, v7                                // 000000000278: 240E0E82
+// CHECK:  s_endpgm                                                   // 00000000027C: BF810000
index 38de13b..6c26b45 100644 (file)
@@ -395,12 +395,43 @@ public:
   }
 };
 HexagonPrettyPrinter HexagonPrettyPrinterInst;
+
+class AMDGCNPrettyPrinter : public PrettyPrinter {
+public:
+  void printInst(MCInstPrinter &IP,
+                 const MCInst *MI,
+                 ArrayRef<uint8_t> Bytes,
+                 uint64_t Address,
+                 raw_ostream &OS,
+                 StringRef Annot,
+                 MCSubtargetInfo const &STI) override {
+    SmallString<40> InstStr;
+    raw_svector_ostream IS(InstStr);
+
+    IP.printInst(MI, IS, "", STI);
+
+    OS << left_justify(IS.str(), 60) << format("// %012X: ", Address);
+    typedef support::ulittle32_t U32;
+    for (auto D : makeArrayRef(reinterpret_cast<const U32*>(Bytes.data()),
+                               Bytes.size() / sizeof(U32)))
+      // D should be explicitly casted to uint32_t here as it is passed
+      // by format to snprintf as vararg.
+      OS << format("%08X ", static_cast<uint32_t>(D));
+
+    if (!Annot.empty())
+      OS << "// " << Annot;
+  }
+};
+AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;
+
 PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
   switch(Triple.getArch()) {
   default:
     return PrettyPrinterInst;
   case Triple::hexagon:
     return HexagonPrettyPrinterInst;
+  case Triple::amdgcn:
+    return AMDGCNPrettyPrinterInst;
   }
 }
 }
@@ -1057,6 +1088,18 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       if (Start >= End)
         continue;
 
+      if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
+        // make size 4 bytes folded
+        End = Start + ((End - Start) & ~0x3ull);
+        Start += 256; // add sizeof(amd_kernel_code_t)
+        // cut trailing zeroes - up to 256 bytes (align)
+        const uint64_t EndAlign = 256;
+        const auto Limit = End - (std::min)(EndAlign, End - Start);
+        while (End > Limit &&
+          *reinterpret_cast<const support::ulittle32_t*>(&Bytes[End - 4]) == 0)
+          End -= 4;
+      }
+
       outs() << '\n' << Symbols[si].second << ":\n";
 
 #ifndef NDEBUG