From f5831514612cd9e014e4fc7455b75411531fe6e1 Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Tue, 20 Sep 2022 20:05:19 +0000 Subject: [PATCH] [NFC][AMDGPU] Refactor AMDGPUDisassembler Clean up ahead of a patch to fix bugs in the AMDGPUDisassembler. Use lit.local.cfg substitutions and more idiomatic use of split-file to simplify and extend existing kernel-descriptor disassembly tests. Add a comment to AMDHSAKernelDescriptor.h, as at least one small set towards keeping all kernel-descriptor sensitive code in sync. Reviewed By: kzhuravl, arsenm Differential Revision: https://reviews.llvm.org/D130105 --- llvm/include/llvm/Support/AMDHSAKernelDescriptor.h | 7 + .../AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 40 +++--- .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 7 + llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s | 150 ++++++++++++++++++--- llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s | 149 +++++++++++++++++--- .../tools/llvm-objdump/ELF/AMDGPU/lit.local.cfg | 6 + 6 files changed, 297 insertions(+), 62 deletions(-) diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index 61b0574..f56f231 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -9,6 +9,13 @@ /// \file /// AMDHSA kernel descriptor definitions. For more information, visit /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor +/// +/// \warning +/// Any changes to this file should also be audited for corresponding changes +/// needed in both the assembler and disassembler, namely: +/// * AMDGPUAsmPrinter.{cpp,h} +/// * AMDGPUTargetStreamer.{cpp,h} +/// * AMDGPUDisassembler.{cpp,h} // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9dec1fe..f4a5a38 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1768,10 +1768,10 @@ bool AMDGPUDisassembler::hasArchitectedFlatScratch() const { //===----------------------------------------------------------------------===// // AMDGPU specific symbol handling //===----------------------------------------------------------------------===// +#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK)) #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ do { \ - KdStream << Indent << DIRECTIVE " " \ - << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \ } while (0) // NOLINTNEXTLINE(readability-identifier-naming) @@ -1786,8 +1786,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( // simply calculate the inverse of what the assembler does. uint32_t GranulatedWorkitemVGPRCount = - (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> - COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; + GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT); uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); @@ -1814,8 +1813,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( // The disassembler cannot recover the original values of those 3 directives. uint32_t GranulatedWavefrontSGPRCount = - (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> - COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; + GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT); if (isGFX10Plus() && GranulatedWavefrontSGPRCount) return MCDisassembler::Fail; @@ -1925,7 +1923,17 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( return MCDisassembler::Success; } +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + if (!isGFX10Plus() && FourByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; +} #undef PRINT_DIRECTIVE +#undef GET_FIELD MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptorDirective( @@ -1993,30 +2001,16 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( return MCDisassembler::Success; case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: - // COMPUTE_PGM_RSRC3 - // - Only set for GFX10, GFX6-9 have this to be 0. - // - Currently no directives directly control this. FourByteBuffer = DE.getU32(Cursor); - if (!isGFX10Plus() && FourByteBuffer) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; + return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream); case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: FourByteBuffer = DE.getU32(Cursor); - if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == - MCDisassembler::Fail) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; + return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream); case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: FourByteBuffer = DE.getU32(Cursor); - if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == - MCDisassembler::Fail) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; + return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream); case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: using namespace amdhsa; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index e987778d..a942762 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -156,6 +156,13 @@ public: DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const; + /// Decode as directives that handle COMPUTE_PGM_RSRC3. + /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC3. + /// \param KdStream - Stream to write the disassembled directives to. + // NOLINTNEXTLINE(readability-identifier-naming) + DecodeStatus decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, + raw_string_ostream &KdStream) const; + DecodeStatus convertEXPInst(MCInst &MI) const; DecodeStatus convertVINTERPInst(MCInst &MI) const; DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const; diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s index 825a6cf..a7af65a 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s @@ -1,26 +1,52 @@ ;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor. -; RUN: split-file %s %t.dir - -; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble -; RUN: diff %t1 %t1-re-assemble - -; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble -; RUN: diff %t2 %t2-re-assemble - -; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble -; RUN: diff %t3 %t3-re-assemble - +; RUN: split-file %s %t && cd %t ;--- 1.s ;; Only set next_free_sgpr. -.amdhsa_kernel my_kernel_1 +; RUN: %assemble -mcpu=gfx908 <1.s >1.o +; RUN: %disassemble_kd 1.o | %tee_kd 1-disasm.s | FileCheck 1.s +; RUN: %assemble -mcpu=gfx908 <1-disasm.s >1-disasm.o +; RUN: diff 1.o 1-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 4 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 48 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 0 .amdhsa_next_free_sgpr 42 .amdhsa_reserve_flat_scratch 0 @@ -30,7 +56,49 @@ ;--- 2.s ;; Only set other directives. -.amdhsa_kernel my_kernel_2 +; RUN: %assemble -mcpu=gfx908 <2.s >2.o +; RUN: %disassemble_kd 2.o | %tee_kd 2-disasm.s | FileCheck 2.s +; RUN: %assemble -mcpu=gfx908 <2-disasm.s >2-disasm.o +; RUN: diff 2.o 2-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 4 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 0 .amdhsa_next_free_sgpr 0 .amdhsa_reserve_flat_scratch 1 @@ -40,7 +108,49 @@ ;--- 3.s ;; Set all affecting directives. -.amdhsa_kernel my_kernel_3 +; RUN: %assemble -mcpu=gfx908 <3.s >3.o +; RUN: %disassemble_kd 3.o | %tee_kd 3-disasm.s | FileCheck 3.s +; RUN: %assemble -mcpu=gfx908 <3-disasm.s >3-disasm.o +; RUN: diff 3.o 3-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 4 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 48 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 0 .amdhsa_next_free_sgpr 35 .amdhsa_reserve_flat_scratch 1 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s index 52981b6..e08aeff 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s @@ -1,36 +1,147 @@ ;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor. -; RUN: split-file %s %t.dir - -; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble -; RUN: diff %t1 %t1-re-assemble - -; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble -; RUN: diff %t2 %t2-re-assemble - -; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +7 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble -; RUN: diff %t3 %t3-re-assemble +; RUN: split-file %s %t && cd %t ;--- 1.s -.amdhsa_kernel my_kernel_1 +; RUN: %assemble -mcpu=gfx908 <1.s >1.o +; RUN: %disassemble_kd 1.o | %tee_kd 1-disasm.s | FileCheck 1.s +; RUN: %assemble -mcpu=gfx908 <1-disasm.s >1-disasm.o +; RUN: diff 1.o 1-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 24 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 23 .amdhsa_next_free_sgpr 0 .end_amdhsa_kernel ;--- 2.s -.amdhsa_kernel my_kernel_2 +; RUN: %assemble -mcpu=gfx908 <2.s >2.o +; RUN: %disassemble_kd 2.o | %tee_kd 2-disasm.s | FileCheck 2.s +; RUN: %assemble -mcpu=gfx908 <2-disasm.s >2-disasm.o +; RUN: diff 2.o 2-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 16 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 14 .amdhsa_next_free_sgpr 0 .end_amdhsa_kernel ;--- 3.s -.amdhsa_kernel my_kernel_3 +; RUN: %assemble -mcpu=gfx908 <3.s >3.o +; RUN: %disassemble_kd 3.o | %tee_kd 3-disasm.s | FileCheck 3.s +; RUN: %assemble -mcpu=gfx908 <3-disasm.s >3-disasm.o +; RUN: diff 3.o 3-disasm.o +; CHECK: .amdhsa_kernel kernel +; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 +; CHECK-NEXT: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_next_free_vgpr 32 +; CHECK-NEXT: .amdhsa_reserve_vcc 0 +; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0 +; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0 +; CHECK-NEXT: .amdhsa_next_free_sgpr 8 +; CHECK-NEXT: .amdhsa_float_round_mode_32 0 +; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0 +; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3 +; CHECK-NEXT: .amdhsa_dx10_clamp 1 +; CHECK-NEXT: .amdhsa_ieee_mode 1 +; CHECK-NEXT: .amdhsa_fp16_overflow 0 +; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +; CHECK-NEXT: .amdhsa_exception_int_div_zero 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0 +; CHECK-NEXT: .end_amdhsa_kernel +.amdhsa_kernel kernel .amdhsa_next_free_vgpr 32 .amdhsa_next_free_sgpr 0 .end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/lit.local.cfg index 2a665f0..bb818a2 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/lit.local.cfg +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/lit.local.cfg @@ -1,2 +1,8 @@ if not 'AMDGPU' in config.root.targets: config.unsupported = True + +config.substitutions.append(('%disassemble_kd', 'llvm-objdump --disassemble-symbols=kernel.kd')) +config.substitutions.append(('%tee_kd', 'tail -n +7 | tee')) +config.substitutions.append(('%assemble_wave32', 'llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize32,-wavefrontsize64 -filetype=obj')) +config.substitutions.append(('%assemble_wave64', 'llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj')) +config.substitutions.append(('%assemble', 'llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj')) -- 2.7.4