Clean up ahead of a patch to fix bugs in the AMDGPUDisassembler.
Use lit.local.cfg substitutions and more idiomatic use of split-file to
simplify and extend existing kernel-descriptor disassembly tests.
Add a comment to AMDHSAKernelDescriptor.h, as at least one small set
towards keeping all kernel-descriptor sensitive code in sync.
Reviewed By: kzhuravl, arsenm
Differential Revision: https://reviews.llvm.org/D130105
/// \file
/// AMDHSA kernel descriptor definitions. For more information, visit
/// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
+///
+/// \warning
+/// Any changes to this file should also be audited for corresponding changes
+/// needed in both the assembler and disassembler, namely:
+/// * AMDGPUAsmPrinter.{cpp,h}
+/// * AMDGPUTargetStreamer.{cpp,h}
+/// * AMDGPUDisassembler.{cpp,h}
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// AMDGPU specific symbol handling
//===----------------------------------------------------------------------===//
+#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
do { \
- KdStream << Indent << DIRECTIVE " " \
- << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
+ KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
} while (0)
// NOLINTNEXTLINE(readability-identifier-naming)
// simply calculate the inverse of what the assembler does.
uint32_t GranulatedWorkitemVGPRCount =
- (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
- COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
+ GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
// The disassembler cannot recover the original values of those 3 directives.
uint32_t GranulatedWavefrontSGPRCount =
- (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
- COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
+ GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
+// NOLINTNEXTLINE(readability-identifier-naming)
+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
+ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
+ using namespace amdhsa;
+ if (!isGFX10Plus() && FourByteBuffer) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+}
#undef PRINT_DIRECTIVE
+#undef GET_FIELD
MCDisassembler::DecodeStatus
AMDGPUDisassembler::decodeKernelDescriptorDirective(
return MCDisassembler::Success;
case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
- // COMPUTE_PGM_RSRC3
- // - Only set for GFX10, GFX6-9 have this to be 0.
- // - Currently no directives directly control this.
FourByteBuffer = DE.getU32(Cursor);
- if (!isGFX10Plus() && FourByteBuffer) {
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
+ return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
- if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
- MCDisassembler::Fail) {
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
+ return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
- if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
- MCDisassembler::Fail) {
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
+ return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
using namespace amdhsa;
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
raw_string_ostream &KdStream) const;
+ /// Decode as directives that handle COMPUTE_PGM_RSRC3.
+ /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC3.
+ /// \param KdStream - Stream to write the disassembled directives to.
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ DecodeStatus decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer,
+ raw_string_ostream &KdStream) const;
+
DecodeStatus convertEXPInst(MCInst &MI) const;
DecodeStatus convertVINTERPInst(MCInst &MI) const;
DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor.
-; RUN: split-file %s %t.dir
-
-; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
-; RUN: diff %t1 %t1-re-assemble
-
-; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
-; RUN: diff %t2 %t2-re-assemble
-
-; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
-; RUN: diff %t3 %t3-re-assemble
-
+; RUN: split-file %s %t && cd %t
;--- 1.s
;; Only set next_free_sgpr.
-.amdhsa_kernel my_kernel_1
+; RUN: %assemble -mcpu=gfx908 <1.s >1.o
+; RUN: %disassemble_kd 1.o | %tee_kd 1-disasm.s | FileCheck 1.s
+; RUN: %assemble -mcpu=gfx908 <1-disasm.s >1-disasm.o
+; RUN: diff 1.o 1-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 4
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 48
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 42
.amdhsa_reserve_flat_scratch 0
;--- 2.s
;; Only set other directives.
-.amdhsa_kernel my_kernel_2
+; RUN: %assemble -mcpu=gfx908 <2.s >2.o
+; RUN: %disassemble_kd 2.o | %tee_kd 2-disasm.s | FileCheck 2.s
+; RUN: %assemble -mcpu=gfx908 <2-disasm.s >2-disasm.o
+; RUN: diff 2.o 2-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 4
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.amdhsa_reserve_flat_scratch 1
;--- 3.s
;; Set all affecting directives.
-.amdhsa_kernel my_kernel_3
+; RUN: %assemble -mcpu=gfx908 <3.s >3.o
+; RUN: %disassemble_kd 3.o | %tee_kd 3-disasm.s | FileCheck 3.s
+; RUN: %assemble -mcpu=gfx908 <3-disasm.s >3-disasm.o
+; RUN: diff 3.o 3-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 4
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 48
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 35
.amdhsa_reserve_flat_scratch 1
;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor.
-; RUN: split-file %s %t.dir
-
-; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
-; RUN: diff %t1 %t1-re-assemble
-
-; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
-; RUN: diff %t2 %t2-re-assemble
-
-; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
-; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +7 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
-; RUN: diff %t3 %t3-re-assemble
+; RUN: split-file %s %t && cd %t
;--- 1.s
-.amdhsa_kernel my_kernel_1
+; RUN: %assemble -mcpu=gfx908 <1.s >1.o
+; RUN: %disassemble_kd 1.o | %tee_kd 1-disasm.s | FileCheck 1.s
+; RUN: %assemble -mcpu=gfx908 <1-disasm.s >1-disasm.o
+; RUN: diff 1.o 1-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 24
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 23
.amdhsa_next_free_sgpr 0
.end_amdhsa_kernel
;--- 2.s
-.amdhsa_kernel my_kernel_2
+; RUN: %assemble -mcpu=gfx908 <2.s >2.o
+; RUN: %disassemble_kd 2.o | %tee_kd 2-disasm.s | FileCheck 2.s
+; RUN: %assemble -mcpu=gfx908 <2-disasm.s >2-disasm.o
+; RUN: diff 2.o 2-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 16
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 14
.amdhsa_next_free_sgpr 0
.end_amdhsa_kernel
;--- 3.s
-.amdhsa_kernel my_kernel_3
+; RUN: %assemble -mcpu=gfx908 <3.s >3.o
+; RUN: %disassemble_kd 3.o | %tee_kd 3-disasm.s | FileCheck 3.s
+; RUN: %assemble -mcpu=gfx908 <3-disasm.s >3-disasm.o
+; RUN: diff 3.o 3-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 0
.end_amdhsa_kernel
if not 'AMDGPU' in config.root.targets:
config.unsupported = True
+
+config.substitutions.append(('%disassemble_kd', 'llvm-objdump --disassemble-symbols=kernel.kd'))
+config.substitutions.append(('%tee_kd', 'tail -n +7 | tee'))
+config.substitutions.append(('%assemble_wave32', 'llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize32,-wavefrontsize64 -filetype=obj'))
+config.substitutions.append(('%assemble_wave64', 'llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj'))
+config.substitutions.append(('%assemble', 'llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj'))