[NFC][AMDGPU] Refactor AMDGPUDisassembler
authorScott Linder <Scott.Linder@amd.com>
Tue, 20 Sep 2022 20:05:19 +0000 (20:05 +0000)
committerScott Linder <Scott.Linder@amd.com>
Tue, 20 Sep 2022 20:37:19 +0000 (20:37 +0000)
Clean up ahead of a patch to fix bugs in the AMDGPUDisassembler.

Use lit.local.cfg substitutions and more idiomatic use of split-file to
simplify and extend existing kernel-descriptor disassembly tests.

Add a comment to AMDHSAKernelDescriptor.h, as at least one small set
towards keeping all kernel-descriptor sensitive code in sync.

Reviewed By: kzhuravl, arsenm

Differential Revision: https://reviews.llvm.org/D130105

llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s
llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s
llvm/test/tools/llvm-objdump/ELF/AMDGPU/lit.local.cfg

index 61b0574..f56f231 100644 (file)
@@ -9,6 +9,13 @@
 /// \file
 /// AMDHSA kernel descriptor definitions. For more information, visit
 /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
+///
+/// \warning
+/// Any changes to this file should also be audited for corresponding changes
+/// needed in both the assembler and disassembler, namely:
+/// * AMDGPUAsmPrinter.{cpp,h}
+/// * AMDGPUTargetStreamer.{cpp,h}
+/// * AMDGPUDisassembler.{cpp,h}
 //
 //===----------------------------------------------------------------------===//
 
index 9dec1fe..f4a5a38 100644 (file)
@@ -1768,10 +1768,10 @@ bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
 //===----------------------------------------------------------------------===//
 // AMDGPU specific symbol handling
 //===----------------------------------------------------------------------===//
+#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
   do {                                                                         \
-    KdStream << Indent << DIRECTIVE " "                                        \
-             << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';           \
+    KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n';            \
   } while (0)
 
 // NOLINTNEXTLINE(readability-identifier-naming)
@@ -1786,8 +1786,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
   // simply calculate the inverse of what the assembler does.
 
   uint32_t GranulatedWorkitemVGPRCount =
-      (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
-      COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
+      GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
 
   uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
                           AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
@@ -1814,8 +1813,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
   // The disassembler cannot recover the original values of those 3 directives.
 
   uint32_t GranulatedWavefrontSGPRCount =
-      (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
-      COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
+      GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
 
   if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
     return MCDisassembler::Fail;
@@ -1925,7 +1923,17 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
   return MCDisassembler::Success;
 }
 
+// NOLINTNEXTLINE(readability-identifier-naming)
+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
+    uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
+  using namespace amdhsa;
+  if (!isGFX10Plus() && FourByteBuffer) {
+    return MCDisassembler::Fail;
+  }
+  return MCDisassembler::Success;
+}
 #undef PRINT_DIRECTIVE
+#undef GET_FIELD
 
 MCDisassembler::DecodeStatus
 AMDGPUDisassembler::decodeKernelDescriptorDirective(
@@ -1993,30 +2001,16 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
     return MCDisassembler::Success;
 
   case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
-    // COMPUTE_PGM_RSRC3
-    //  - Only set for GFX10, GFX6-9 have this to be 0.
-    //  - Currently no directives directly control this.
     FourByteBuffer = DE.getU32(Cursor);
-    if (!isGFX10Plus() && FourByteBuffer) {
-      return MCDisassembler::Fail;
-    }
-    return MCDisassembler::Success;
+    return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
 
   case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
     FourByteBuffer = DE.getU32(Cursor);
-    if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
-        MCDisassembler::Fail) {
-      return MCDisassembler::Fail;
-    }
-    return MCDisassembler::Success;
+    return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
 
   case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
     FourByteBuffer = DE.getU32(Cursor);
-    if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
-        MCDisassembler::Fail) {
-      return MCDisassembler::Fail;
-    }
-    return MCDisassembler::Success;
+    return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
 
   case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
     using namespace amdhsa;
index e987778..a942762 100644 (file)
@@ -156,6 +156,13 @@ public:
   DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
                                        raw_string_ostream &KdStream) const;
 
+  /// Decode as directives that handle COMPUTE_PGM_RSRC3.
+  /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC3.
+  /// \param KdStream       - Stream to write the disassembled directives to.
+  // NOLINTNEXTLINE(readability-identifier-naming)
+  DecodeStatus decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer,
+                                       raw_string_ostream &KdStream) const;
+
   DecodeStatus convertEXPInst(MCInst &MI) const;
   DecodeStatus convertVINTERPInst(MCInst &MI) const;
   DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
index 825a6cf..a7af65a 100644 (file)
@@ -1,26 +1,52 @@
 ;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor.
 
-; RUN: split-file %s %t.dir
-
-; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
-; RUN: diff %t1 %t1-re-assemble
-
-; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
-; RUN: diff %t2 %t2-re-assemble
-
-; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
-; RUN: diff %t3 %t3-re-assemble
-
+; RUN: split-file %s %t && cd %t
 
 ;--- 1.s
 ;; Only set next_free_sgpr.
-.amdhsa_kernel my_kernel_1
+; RUN: %assemble -mcpu=gfx908 <1.s >1.o
+; RUN: %disassemble_kd 1.o | %tee_kd 1-disasm.s | FileCheck 1.s
+; RUN: %assemble -mcpu=gfx908 <1-disasm.s >1-disasm.o
+; RUN: diff 1.o 1-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 4
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 48
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
   .amdhsa_next_free_vgpr 0
   .amdhsa_next_free_sgpr 42
   .amdhsa_reserve_flat_scratch 0
 
 ;--- 2.s
 ;; Only set other directives.
-.amdhsa_kernel my_kernel_2
+; RUN: %assemble -mcpu=gfx908 <2.s >2.o
+; RUN: %disassemble_kd 2.o | %tee_kd 2-disasm.s | FileCheck 2.s
+; RUN: %assemble -mcpu=gfx908 <2-disasm.s >2-disasm.o
+; RUN: diff 2.o 2-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 4
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
   .amdhsa_next_free_vgpr 0
   .amdhsa_next_free_sgpr 0
   .amdhsa_reserve_flat_scratch 1
 
 ;--- 3.s
 ;; Set all affecting directives.
-.amdhsa_kernel my_kernel_3
+; RUN: %assemble -mcpu=gfx908 <3.s >3.o
+; RUN: %disassemble_kd 3.o | %tee_kd 3-disasm.s | FileCheck 3.s
+; RUN: %assemble -mcpu=gfx908 <3-disasm.s >3-disasm.o
+; RUN: diff 3.o 3-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 4
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 48
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
   .amdhsa_next_free_vgpr 0
   .amdhsa_next_free_sgpr 35
   .amdhsa_reserve_flat_scratch 1
index 52981b6..e08aeff 100644 (file)
 ;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor.
 
-; RUN: split-file %s %t.dir
-
-; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
-; RUN: diff %t1 %t1-re-assemble
-
-; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
-; RUN: diff %t2 %t2-re-assemble
-
-; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
-; RUN: diff %t3 %t3-re-assemble
+; RUN: split-file %s %t && cd %t
 
 ;--- 1.s
-.amdhsa_kernel my_kernel_1
+; RUN: %assemble -mcpu=gfx908 <1.s >1.o
+; RUN: %disassemble_kd 1.o | %tee_kd 1-disasm.s | FileCheck 1.s
+; RUN: %assemble -mcpu=gfx908 <1-disasm.s >1-disasm.o
+; RUN: diff 1.o 1-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 24
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
   .amdhsa_next_free_vgpr 23
   .amdhsa_next_free_sgpr 0
 .end_amdhsa_kernel
 
 ;--- 2.s
-.amdhsa_kernel my_kernel_2
+; RUN: %assemble -mcpu=gfx908 <2.s >2.o
+; RUN: %disassemble_kd 2.o | %tee_kd 2-disasm.s | FileCheck 2.s
+; RUN: %assemble -mcpu=gfx908 <2-disasm.s >2-disasm.o
+; RUN: diff 2.o 2-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 16
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
   .amdhsa_next_free_vgpr 14
   .amdhsa_next_free_sgpr 0
 .end_amdhsa_kernel
 
 ;--- 3.s
-.amdhsa_kernel my_kernel_3
+; RUN: %assemble -mcpu=gfx908 <3.s >3.o
+; RUN: %disassemble_kd 3.o | %tee_kd 3-disasm.s | FileCheck 3.s
+; RUN: %assemble -mcpu=gfx908 <3-disasm.s >3-disasm.o
+; RUN: diff 3.o 3-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
   .amdhsa_next_free_vgpr 32
   .amdhsa_next_free_sgpr 0
 .end_amdhsa_kernel
index 2a665f0..bb818a2 100644 (file)
@@ -1,2 +1,8 @@
 if not 'AMDGPU' in config.root.targets:
     config.unsupported = True
+
+config.substitutions.append(('%disassemble_kd', 'llvm-objdump --disassemble-symbols=kernel.kd'))
+config.substitutions.append(('%tee_kd', 'tail -n +7 | tee'))
+config.substitutions.append(('%assemble_wave32', 'llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize32,-wavefrontsize64 -filetype=obj'))
+config.substitutions.append(('%assemble_wave64', 'llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj'))
+config.substitutions.append(('%assemble', 'llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj'))