From: Sam Kolton Date: Fri, 9 Sep 2016 10:08:02 +0000 (+0000) Subject: [AMDGPU] Assembler: rename amd_kernel_code_t asm names according to spec X-Git-Tag: llvmorg-4.0.0-rc1~10243 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a2e5c88bafa0454455266b58b96e879a91cefe15;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Assembler: rename amd_kernel_code_t asm names according to spec Summary: Also removed duplicate code from AMDGPUTargetAsmStreamer. This change only change how amd_kernel_code_t is parsed and printed. No variable names are changed. Reviewers: vpykhtin, tstellarAMD Subscribers: arsenm, wdng, nhaehnle Differential Revision: https://reviews.llvm.org/D24296 llvm-svn: 281028 --- diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 83dcaac..3f668f3 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -14,6 +14,7 @@ #include "AMDGPUTargetStreamer.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" +#include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" @@ -56,169 +57,9 @@ AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { - uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32); - bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); - bool EnableSGPRDispatchPtr = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); - bool EnableSGPRQueuePtr = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); - bool EnableSGPRKernargSegmentPtr = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); - bool EnableSGPRDispatchID = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); - bool EnableSGPRFlatScratchInit = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); - bool EnableSGPRPrivateSegmentSize = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); - bool EnableSGPRGridWorkgroupCountX = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X); - bool EnableSGPRGridWorkgroupCountY = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y); - bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z); - bool EnableOrderedAppendGDS = (Header.code_properties & - AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS); - uint32_t PrivateElementSize = (Header.code_properties & - AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >> - AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT; - bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64); - bool IsDynamicCallstack = (Header.code_properties & - AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK); - bool IsDebugEnabled = (Header.code_properties & - AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED); - bool IsXNackEnabled = (Header.code_properties & - AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED); - - OS << "\t.amd_kernel_code_t\n" << - "\t\tkernel_code_version_major = " << - Header.amd_kernel_code_version_major << '\n' << - "\t\tkernel_code_version_minor = " << - Header.amd_kernel_code_version_minor << '\n' << - "\t\tmachine_kind = " << - Header.amd_machine_kind << '\n' << - "\t\tmachine_version_major = " << - Header.amd_machine_version_major << '\n' << - "\t\tmachine_version_minor = " << - Header.amd_machine_version_minor << '\n' << - "\t\tmachine_version_stepping = " << - Header.amd_machine_version_stepping << '\n' << - "\t\tkernel_code_entry_byte_offset = " << - Header.kernel_code_entry_byte_offset << '\n' << - "\t\tkernel_code_prefetch_byte_size = " << - Header.kernel_code_prefetch_byte_size << '\n' << - "\t\tmax_scratch_backing_memory_byte_size = " << - Header.max_scratch_backing_memory_byte_size << '\n' << - "\t\tcompute_pgm_rsrc1_vgprs = " << - G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' << - "\t\tcompute_pgm_rsrc1_sgprs = " << - G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' << - "\t\tcompute_pgm_rsrc1_priority = " << - G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' << - "\t\tcompute_pgm_rsrc1_float_mode = " << - G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' << - "\t\tcompute_pgm_rsrc1_priv = " << - G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' << - "\t\tcompute_pgm_rsrc1_dx10_clamp = " << - G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' << - "\t\tcompute_pgm_rsrc1_debug_mode = " << - G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' << - "\t\tcompute_pgm_rsrc1_ieee_mode = " << - G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' << - "\t\tcompute_pgm_rsrc2_scratch_en = " << - G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' << - "\t\tcompute_pgm_rsrc2_user_sgpr = " << - G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' << - "\t\tcompute_pgm_rsrc2_tgid_x_en = " << - G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' << - "\t\tcompute_pgm_rsrc2_tgid_y_en = " << - G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' << - "\t\tcompute_pgm_rsrc2_tgid_z_en = " << - G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' << - "\t\tcompute_pgm_rsrc2_tg_size_en = " << - G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' << - "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " << - G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' << - "\t\tcompute_pgm_rsrc2_excp_en_msb = " << - G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' << - "\t\tcompute_pgm_rsrc2_lds_size = " << - G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' << - "\t\tcompute_pgm_rsrc2_excp_en = " << - G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' << - - "\t\tenable_sgpr_private_segment_buffer = " << - EnableSGPRPrivateSegmentBuffer << '\n' << - "\t\tenable_sgpr_dispatch_ptr = " << - EnableSGPRDispatchPtr << '\n' << - "\t\tenable_sgpr_queue_ptr = " << - EnableSGPRQueuePtr << '\n' << - "\t\tenable_sgpr_kernarg_segment_ptr = " << - EnableSGPRKernargSegmentPtr << '\n' << - "\t\tenable_sgpr_dispatch_id = " << - EnableSGPRDispatchID << '\n' << - "\t\tenable_sgpr_flat_scratch_init = " << - EnableSGPRFlatScratchInit << '\n' << - "\t\tenable_sgpr_private_segment_size = " << - EnableSGPRPrivateSegmentSize << '\n' << - "\t\tenable_sgpr_grid_workgroup_count_x = " << - EnableSGPRGridWorkgroupCountX << '\n' << - "\t\tenable_sgpr_grid_workgroup_count_y = " << - EnableSGPRGridWorkgroupCountY << '\n' << - "\t\tenable_sgpr_grid_workgroup_count_z = " << - EnableSGPRGridWorkgroupCountZ << '\n' << - "\t\tenable_ordered_append_gds = " << - EnableOrderedAppendGDS << '\n' << - "\t\tprivate_element_size = " << - PrivateElementSize << '\n' << - "\t\tis_ptr64 = " << - IsPtr64 << '\n' << - "\t\tis_dynamic_callstack = " << - IsDynamicCallstack << '\n' << - "\t\tis_debug_enabled = " << - IsDebugEnabled << '\n' << - "\t\tis_xnack_enabled = " << - IsXNackEnabled << '\n' << - "\t\tworkitem_private_segment_byte_size = " << - Header.workitem_private_segment_byte_size << '\n' << - "\t\tworkgroup_group_segment_byte_size = " << - Header.workgroup_group_segment_byte_size << '\n' << - "\t\tgds_segment_byte_size = " << - Header.gds_segment_byte_size << '\n' << - "\t\tkernarg_segment_byte_size = " << - Header.kernarg_segment_byte_size << '\n' << - "\t\tworkgroup_fbarrier_count = " << - Header.workgroup_fbarrier_count << '\n' << - "\t\twavefront_sgpr_count = " << - Header.wavefront_sgpr_count << '\n' << - "\t\tworkitem_vgpr_count = " << - Header.workitem_vgpr_count << '\n' << - "\t\treserved_vgpr_first = " << - Header.reserved_vgpr_first << '\n' << - "\t\treserved_vgpr_count = " << - Header.reserved_vgpr_count << '\n' << - "\t\treserved_sgpr_first = " << - Header.reserved_sgpr_first << '\n' << - "\t\treserved_sgpr_count = " << - Header.reserved_sgpr_count << '\n' << - "\t\tdebug_wavefront_private_segment_offset_sgpr = " << - Header.debug_wavefront_private_segment_offset_sgpr << '\n' << - "\t\tdebug_private_segment_buffer_sgpr = " << - Header.debug_private_segment_buffer_sgpr << '\n' << - "\t\tkernarg_segment_alignment = " << - (uint32_t)Header.kernarg_segment_alignment << '\n' << - "\t\tgroup_segment_alignment = " << - (uint32_t)Header.group_segment_alignment << '\n' << - "\t\tprivate_segment_alignment = " << - (uint32_t)Header.private_segment_alignment << '\n' << - "\t\twavefront_size = " << - (uint32_t)Header.wavefront_size << '\n' << - "\t\tcall_convention = " << - Header.call_convention << '\n' << - "\t\truntime_loader_kernel_symbol = " << - Header.runtime_loader_kernel_symbol << '\n' << - // TODO: control_directives - "\t.end_amd_kernel_code_t\n"; - + OS << "\t.amd_kernel_code_t\n"; + dumpAmdKernelCode(&Header, OS, "\t\t"); + OS << "\t.end_amd_kernel_code_t\n"; } void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h index 3a5ff60..c55eaab 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h @@ -16,10 +16,10 @@ #define QNAME(name) amd_kernel_code_t::name #define FLD_T(name) decltype(QNAME(name)), &QNAME(name) -#define FIELD2(sname, name) \ - RECORD(sname, printField, parseField) +#define FIELD2(sname, aname, name) \ + RECORD(sname, aname, printField, parseField) -#define FIELD(name) FIELD2(name, name) +#define FIELD(name) FIELD2(name, name, name) #define PRINTCODEPROP(name) \ @@ -33,7 +33,7 @@ AMD_CODE_PROPERTY_##name##_WIDTH> #define CODEPROP(name, shift) \ - RECORD(name, PRINTCODEPROP(shift), PARSECODEPROP(shift)) + RECORD(name, name, PRINTCODEPROP(shift), PARSECODEPROP(shift)) // have to define these lambdas because of Set/GetMacro #define PRINTCOMP(GetMacro, Shift) \ @@ -50,32 +50,70 @@ return true; \ } -#define COMPPGM(name, GetMacro, SetMacro, Shift) \ - RECORD(name, PRINTCOMP(GetMacro, Shift), PARSECOMP(SetMacro, Shift)) +#define COMPPGM(name, aname, GetMacro, SetMacro, Shift) \ + RECORD(name, aname, PRINTCOMP(GetMacro, Shift), PARSECOMP(SetMacro, Shift)) -#define COMPPGM1(name, AccMacro) \ - COMPPGM(compute_pgm_rsrc1_##name, \ - G_00B848_##AccMacro, S_00B848_##AccMacro, 0) +#define COMPPGM1(name, aname, AccMacro) \ + COMPPGM(name, aname, G_00B848_##AccMacro, S_00B848_##AccMacro, 0) -#define COMPPGM2(name, AccMacro) \ - COMPPGM(compute_pgm_rsrc2_##name, \ - G_00B84C_##AccMacro, S_00B84C_##AccMacro, 32) +#define COMPPGM2(name, aname, AccMacro) \ + COMPPGM(name, aname, G_00B84C_##AccMacro, S_00B84C_##AccMacro, 32) /////////////////////////////////////////////////////////////////////////////// // Begin of the table // Define RECORD(name, print, parse) in your code to get field definitions // and include this file -FIELD2(kernel_code_version_major, amd_kernel_code_version_major), -FIELD2(kernel_code_version_minor, amd_kernel_code_version_minor), -FIELD2(machine_kind, amd_machine_kind), -FIELD2(machine_version_major, amd_machine_version_major), -FIELD2(machine_version_minor, amd_machine_version_minor), -FIELD2(machine_version_stepping, amd_machine_version_stepping), +FIELD2(amd_code_version_major, kernel_code_version_major, amd_kernel_code_version_major), +FIELD2(amd_code_version_minor, kernel_code_version_minor, amd_kernel_code_version_minor), +FIELD2(amd_machine_kind, machine_kind, amd_machine_kind), +FIELD2(amd_machine_version_major, machine_version_major, amd_machine_version_major), +FIELD2(amd_machine_version_minor, machine_version_minor, amd_machine_version_minor), +FIELD2(amd_machine_version_stepping, machine_version_stepping, amd_machine_version_stepping), + FIELD(kernel_code_entry_byte_offset), FIELD(kernel_code_prefetch_byte_size), FIELD(max_scratch_backing_memory_byte_size), -FIELD(compute_pgm_resource_registers), + +COMPPGM1(granulated_workitem_vgpr_count, compute_pgm_rsrc1_vgprs, VGPRS), +COMPPGM1(granulated_wavefront_sgpr_count, compute_pgm_rsrc1_sgprs, SGPRS), +COMPPGM1(priority, compute_pgm_rsrc1_priority, PRIORITY), +COMPPGM1(float_mode, compute_pgm_rsrc1_float_mode, FLOAT_MODE), // TODO: split float_mode +COMPPGM1(priv, compute_pgm_rsrc1_priv, PRIV), +COMPPGM1(enable_dx10_clamp, compute_pgm_rsrc1_dx10_clamp, DX10_CLAMP), +COMPPGM1(debug_mode, compute_pgm_rsrc1_debug_mode, DEBUG_MODE), +COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE_MODE), +// TODO: bulky +// TODO: cdbg_user +COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN), +COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR), +// TODO: enable_trap_handler +COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN), +COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN), +COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN), +COMPPGM2(enable_sgpr_workgroup_info, compute_pgm_rsrc2_tg_size_en, TG_SIZE_EN), +COMPPGM2(enable_vgpr_workitem_id, compute_pgm_rsrc2_tidig_comp_cnt, TIDIG_COMP_CNT), +COMPPGM2(enable_exception_msb, compute_pgm_rsrc2_excp_en_msb, EXCP_EN_MSB), // TODO: split enable_exception_msb +COMPPGM2(granulated_lds_size, compute_pgm_rsrc2_lds_size, LDS_SIZE), +COMPPGM2(enable_exception, compute_pgm_rsrc2_excp_en, EXCP_EN), // TODO: split enable_exception + +CODEPROP(enable_sgpr_private_segment_buffer, ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER), +CODEPROP(enable_sgpr_dispatch_ptr, ENABLE_SGPR_DISPATCH_PTR), +CODEPROP(enable_sgpr_queue_ptr, ENABLE_SGPR_QUEUE_PTR), +CODEPROP(enable_sgpr_kernarg_segment_ptr, ENABLE_SGPR_KERNARG_SEGMENT_PTR), +CODEPROP(enable_sgpr_dispatch_id, ENABLE_SGPR_DISPATCH_ID), +CODEPROP(enable_sgpr_flat_scratch_init, ENABLE_SGPR_FLAT_SCRATCH_INIT), +CODEPROP(enable_sgpr_private_segment_size, ENABLE_SGPR_PRIVATE_SEGMENT_SIZE), +CODEPROP(enable_sgpr_grid_workgroup_count_x, ENABLE_SGPR_GRID_WORKGROUP_COUNT_X), +CODEPROP(enable_sgpr_grid_workgroup_count_y, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y), +CODEPROP(enable_sgpr_grid_workgroup_count_z, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z), +CODEPROP(enable_ordered_append_gds, ENABLE_ORDERED_APPEND_GDS), +CODEPROP(private_element_size, PRIVATE_ELEMENT_SIZE), +CODEPROP(is_ptr64, IS_PTR64), +CODEPROP(is_dynamic_callstack, IS_DYNAMIC_CALLSTACK), +CODEPROP(is_debug_enabled, IS_DEBUG_SUPPORTED), +CODEPROP(is_xnack_enabled, IS_XNACK_SUPPORTED), + FIELD(workitem_private_segment_byte_size), FIELD(workgroup_group_segment_byte_size), FIELD(gds_segment_byte_size), @@ -94,59 +132,8 @@ FIELD(group_segment_alignment), FIELD(private_segment_alignment), FIELD(wavefront_size), FIELD(call_convention), -FIELD(runtime_loader_kernel_symbol), - -COMPPGM1(vgprs, VGPRS), -COMPPGM1(sgprs, SGPRS), -COMPPGM1(priority, PRIORITY), -COMPPGM1(float_mode, FLOAT_MODE), -COMPPGM1(priv, PRIV), -COMPPGM1(dx10_clamp, DX10_CLAMP), -COMPPGM1(debug_mode, DEBUG_MODE), -COMPPGM1(ieee_mode, IEEE_MODE), -COMPPGM2(scratch_en, SCRATCH_EN), -COMPPGM2(user_sgpr, USER_SGPR), -COMPPGM2(tgid_x_en, TGID_X_EN), -COMPPGM2(tgid_y_en, TGID_Y_EN), -COMPPGM2(tgid_z_en, TGID_Z_EN), -COMPPGM2(tg_size_en, TG_SIZE_EN), -COMPPGM2(tidig_comp_cnt, TIDIG_COMP_CNT), -COMPPGM2(excp_en_msb, EXCP_EN_MSB), -COMPPGM2(lds_size, LDS_SIZE), -COMPPGM2(excp_en, EXCP_EN), - -CODEPROP(enable_sgpr_private_segment_buffer, - ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER), -CODEPROP(enable_sgpr_dispatch_ptr, - ENABLE_SGPR_DISPATCH_PTR), -CODEPROP(enable_sgpr_queue_ptr, - ENABLE_SGPR_QUEUE_PTR), -CODEPROP(enable_sgpr_kernarg_segment_ptr, - ENABLE_SGPR_KERNARG_SEGMENT_PTR), -CODEPROP(enable_sgpr_dispatch_id, - ENABLE_SGPR_DISPATCH_ID), -CODEPROP(enable_sgpr_flat_scratch_init, - ENABLE_SGPR_FLAT_SCRATCH_INIT), -CODEPROP(enable_sgpr_private_segment_size, - ENABLE_SGPR_PRIVATE_SEGMENT_SIZE), -CODEPROP(enable_sgpr_grid_workgroup_count_x, - ENABLE_SGPR_GRID_WORKGROUP_COUNT_X), -CODEPROP(enable_sgpr_grid_workgroup_count_y, - ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y), -CODEPROP(enable_sgpr_grid_workgroup_count_z, - ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z), -CODEPROP(enable_ordered_append_gds, - ENABLE_ORDERED_APPEND_GDS), -CODEPROP(private_element_size, - PRIVATE_ELEMENT_SIZE), -CODEPROP(is_ptr64, - IS_PTR64), -CODEPROP(is_dynamic_callstack, - IS_DYNAMIC_CALLSTACK), -CODEPROP(is_debug_enabled, - IS_DEBUG_SUPPORTED), -CODEPROP(is_xnack_enabled, - IS_XNACK_SUPPORTED) +FIELD(runtime_loader_kernel_symbol) +// TODO: control_directive // end of the table /////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp index f64973a..0333b0a 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp @@ -24,22 +24,37 @@ using namespace llvm; static ArrayRef get_amd_kernel_code_t_FldNames() { static StringRef const Table[] = { "", // not found placeholder -#define RECORD(name, print, parse) #name +#define RECORD(name, altName, print, parse) #name #include "AMDKernelCodeTInfo.h" #undef RECORD }; return makeArrayRef(Table); } -static StringMap createIndexMap(const ArrayRef &a) { +static ArrayRef get_amd_kernel_code_t_FldAltNames() { + static StringRef const Table[] = { + "", // not found placeholder +#define RECORD(name, altName, print, parse) #altName +#include "AMDKernelCodeTInfo.h" +#undef RECORD + }; + return makeArrayRef(Table); +} + +static StringMap createIndexMap(const ArrayRef &names, + const ArrayRef &altNames) { StringMap map; - for (auto Name : a) - map.insert(std::make_pair(Name, map.size())); + assert(names.size() == altNames.size()); + for (unsigned i = 0; i < names.size(); ++i) { + map.insert(std::make_pair(names[i], i)); + map.insert(std::make_pair(altNames[i], i)); + } return map; } static int get_amd_kernel_code_t_FieldIndex(StringRef name) { - static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames()); + static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(), + get_amd_kernel_code_t_FldAltNames()); return map.lookup(name) - 1; // returns -1 if not found } @@ -73,7 +88,7 @@ typedef void(*PrintFx)(StringRef, static ArrayRef getPrinterTable() { static const PrintFx Table[] = { -#define RECORD(name, print, parse) print +#define RECORD(name, altName, print, parse) print #include "AMDKernelCodeTInfo.h" #undef RECORD }; @@ -145,7 +160,7 @@ typedef bool(*ParseFx)(amd_kernel_code_t &, static ArrayRef getParserTable() { static const ParseFx Table[] = { -#define RECORD(name, print, parse) parse +#define RECORD(name, altName, print, parse) parse #include "AMDKernelCodeTInfo.h" #undef RECORD }; diff --git a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll index 36aa677..98a3f17 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -1,9 +1,9 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_default_ci: -; GCN: compute_pgm_rsrc1_float_mode = 192 -; GCN: compute_pgm_rsrc1_dx10_clamp = 1 -; GCN: compute_pgm_rsrc1_ieee_mode = 0 +; GCN: float_mode = 192 +; GCN: enable_dx10_clamp = 1 +; GCN: enable_ieee_mode = 0 define void @test_default_ci(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 { store float 0.0, float addrspace(1)* %out0 store double 0.0, double addrspace(1)* %out1 @@ -11,9 +11,9 @@ define void @test_default_ci(float addrspace(1)* %out0, double addrspace(1)* %ou } ; GCN-LABEL: {{^}}test_default_vi: -; GCN: compute_pgm_rsrc1_float_mode = 192 -; GCN: compute_pgm_rsrc1_dx10_clamp = 1 -; GCN: compute_pgm_rsrc1_ieee_mode = 0 +; GCN: float_mode = 192 +; GCN: enable_dx10_clamp = 1 +; GCN: enable_ieee_mode = 0 define void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 { store float 0.0, float addrspace(1)* %out0 store double 0.0, double addrspace(1)* %out1 @@ -21,9 +21,9 @@ define void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %ou } ; GCN-LABEL: {{^}}test_f64_denormals: -; GCN: compute_pgm_rsrc1_float_mode = 192 -; GCN: compute_pgm_rsrc1_dx10_clamp = 1 -; GCN: compute_pgm_rsrc1_ieee_mode = 0 +; GCN: float_mode = 192 +; GCN: enable_dx10_clamp = 1 +; GCN: enable_ieee_mode = 0 define void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 { store float 0.0, float addrspace(1)* %out0 store double 0.0, double addrspace(1)* %out1 @@ -31,9 +31,9 @@ define void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* } ; GCN-LABEL: {{^}}test_f32_denormals: -; GCN: compute_pgm_rsrc1_float_mode = 48 -; GCN: compute_pgm_rsrc1_dx10_clamp = 1 -; GCN: compute_pgm_rsrc1_ieee_mode = 0 +; GCN: float_mode = 48 +; GCN: enable_dx10_clamp = 1 +; GCN: enable_ieee_mode = 0 define void @test_f32_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #3 { store float 0.0, float addrspace(1)* %out0 store double 0.0, double addrspace(1)* %out1 @@ -41,9 +41,9 @@ define void @test_f32_denormals(float addrspace(1)* %out0, double addrspace(1)* } ; GCN-LABEL: {{^}}test_f32_f64_denormals: -; GCN: compute_pgm_rsrc1_float_mode = 240 -; GCN: compute_pgm_rsrc1_dx10_clamp = 1 -; GCN: compute_pgm_rsrc1_ieee_mode = 0 +; GCN: float_mode = 240 +; GCN: enable_dx10_clamp = 1 +; GCN: enable_ieee_mode = 0 define void @test_f32_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #4 { store float 0.0, float addrspace(1)* %out0 store double 0.0, double addrspace(1)* %out1 @@ -51,9 +51,9 @@ define void @test_f32_f64_denormals(float addrspace(1)* %out0, double addrspace( } ; GCN-LABEL: {{^}}test_no_denormals: -; GCN: compute_pgm_rsrc1_float_mode = 0 -; GCN: compute_pgm_rsrc1_dx10_clamp = 1 -; GCN: compute_pgm_rsrc1_ieee_mode = 0 +; GCN: float_mode = 0 +; GCN: enable_dx10_clamp = 1 +; GCN: enable_ieee_mode = 0 define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #5 { store float 0.0, float addrspace(1)* %out0 store double 0.0, double addrspace(1)* %out1 diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll index 099f063..4f6dbf9 100644 --- a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll +++ b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -18,13 +18,13 @@ ; GCNHSA: .amd_kernel_code_t -; GCNHSA: compute_pgm_rsrc2_scratch_en = 1 -; GCNHSA: compute_pgm_rsrc2_user_sgpr = 8 -; GCNHSA: compute_pgm_rsrc2_tgid_x_en = 1 -; GCNHSA: compute_pgm_rsrc2_tgid_y_en = 0 -; GCNHSA: compute_pgm_rsrc2_tgid_z_en = 0 -; GCNHSA: compute_pgm_rsrc2_tg_size_en = 0 -; GCNHSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; GCNHSA: enable_sgpr_private_segment_wave_byte_offset = 1 +; GCNHSA: user_sgpr_count = 8 +; GCNHSA: enable_sgpr_workgroup_id_x = 1 +; GCNHSA: enable_sgpr_workgroup_id_y = 0 +; GCNHSA: enable_sgpr_workgroup_id_z = 0 +; GCNHSA: enable_sgpr_workgroup_info = 0 +; GCNHSA: enable_vgpr_workitem_id = 0 ; GCNHSA: enable_sgpr_private_segment_buffer = 1 ; GCNHSA: enable_sgpr_dispatch_ptr = 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll index c22eac7..75a9ec9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll @@ -10,12 +10,12 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #0 ; ALL-LABEL {{^}}test_workgroup_id_x: ; HSA: .amd_kernel_code_t -; HSA: compute_pgm_rsrc2_user_sgpr = 6 -; HSA: compute_pgm_rsrc2_tgid_x_en = 1 -; HSA: compute_pgm_rsrc2_tgid_y_en = 0 -; HSA: compute_pgm_rsrc2_tgid_z_en = 0 -; HSA: compute_pgm_rsrc2_tg_size_en = 0 -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; HSA: user_sgpr_count = 6 +; HSA: enable_sgpr_workgroup_id_x = 1 +; HSA: enable_sgpr_workgroup_id_y = 0 +; HSA: enable_sgpr_workgroup_id_z = 0 +; HSA: enable_sgpr_workgroup_info = 0 +; HSA: enable_vgpr_workitem_id = 0 ; HSA: enable_sgpr_grid_workgroup_count_x = 0 ; HSA: enable_sgpr_grid_workgroup_count_y = 0 ; HSA: enable_sgpr_grid_workgroup_count_z = 0 @@ -40,11 +40,11 @@ define void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 { } ; ALL-LABEL {{^}}test_workgroup_id_y: -; HSA: compute_pgm_rsrc2_user_sgpr = 6 -; HSA: compute_pgm_rsrc2_tgid_x_en = 1 -; HSA: compute_pgm_rsrc2_tgid_y_en = 1 -; HSA: compute_pgm_rsrc2_tgid_z_en = 0 -; HSA: compute_pgm_rsrc2_tg_size_en = 0 +; HSA: user_sgpr_count = 6 +; HSA: enable_sgpr_workgroup_id_x = 1 +; HSA: enable_sgpr_workgroup_id_y = 1 +; HSA: enable_sgpr_workgroup_id_z = 0 +; HSA: enable_sgpr_workgroup_info = 0 ; HSA: enable_sgpr_grid_workgroup_count_x = 0 ; HSA: enable_sgpr_grid_workgroup_count_y = 0 ; HSA: enable_sgpr_grid_workgroup_count_z = 0 @@ -68,12 +68,12 @@ define void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 { } ; ALL-LABEL {{^}}test_workgroup_id_z: -; HSA: compute_pgm_rsrc2_user_sgpr = 6 -; HSA: compute_pgm_rsrc2_tgid_x_en = 1 -; HSA: compute_pgm_rsrc2_tgid_y_en = 0 -; HSA: compute_pgm_rsrc2_tgid_z_en = 1 -; HSA: compute_pgm_rsrc2_tg_size_en = 0 -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; HSA: user_sgpr_count = 6 +; HSA: enable_sgpr_workgroup_id_x = 1 +; HSA: enable_sgpr_workgroup_id_y = 0 +; HSA: enable_sgpr_workgroup_id_z = 1 +; HSA: enable_sgpr_workgroup_info = 0 +; HSA: enable_vgpr_workitem_id = 0 ; HSA: enable_sgpr_private_segment_buffer = 1 ; HSA: enable_sgpr_dispatch_ptr = 0 ; HSA: enable_sgpr_queue_ptr = 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll index 28ef7b8..393a593 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -12,7 +12,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0 ; MESA-NEXT: .long 132{{$}} ; ALL-LABEL {{^}}test_workitem_id_x: -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; HSA: enable_vgpr_workitem_id = 0 ; ALL-NOT: v0 ; ALL: {{buffer|flat}}_store_dword {{.*}}v0 @@ -27,7 +27,7 @@ define void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { ; MESA-NEXT: .long 2180{{$}} ; ALL-LABEL {{^}}test_workitem_id_y: -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1 +; HSA: enable_vgpr_workitem_id = 1 ; ALL-NOT: v1 ; ALL: {{buffer|flat}}_store_dword {{.*}}v1 @@ -42,7 +42,7 @@ define void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { ; MESA-NEXT: .long 4228{{$}} ; ALL-LABEL {{^}}test_workitem_id_z: -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2 +; HSA: enable_vgpr_workitem_id = 2 ; ALL-NOT: v2 ; ALL: {{buffer|flat}}_store_dword {{.*}}v2 diff --git a/llvm/test/MC/AMDGPU/hsa-exp.s b/llvm/test/MC/AMDGPU/hsa-exp.s index 0a8d0d29..488afc5 100644 --- a/llvm/test/MC/AMDGPU/hsa-exp.s +++ b/llvm/test/MC/AMDGPU/hsa-exp.s @@ -45,12 +45,12 @@ amd_kernel_code_t_minimal: .amd_kernel_code_t - kernel_code_version_major = .option.machine_version_major + amd_code_version_major = .option.machine_version_major enable_sgpr_kernarg_segment_ptr = 1 is_ptr64 = my_is_ptr64 - compute_pgm_rsrc1_vgprs = 1 - compute_pgm_rsrc1_sgprs = 1+(my_sgpr-1)/8 - compute_pgm_rsrc2_user_sgpr = 2 + granulated_workitem_vgpr_count = 1 + granulated_wavefront_sgpr_count = 1+(my_sgpr-1)/8 + user_sgpr_count = 2 kernarg_segment_byte_size = my_kernarg_segment_byte_size wavefront_sgpr_count = my_sgpr // wavefront_sgpr_count = 7 @@ -58,39 +58,39 @@ amd_kernel_code_t_minimal: // Make sure a blank line won't break anything: // Make sure a line with whitespace won't break anything: - + workitem_vgpr_count = 16 .end_amd_kernel_code_t // ASM-LABEL: {{^}}amd_kernel_code_t_minimal: // ASM: .amd_kernel_code_t -// ASM: kernel_code_version_major = 7 -// ASM: kernel_code_version_minor = 0 -// ASM: machine_kind = 1 -// ASM: machine_version_major = 7 -// ASM: machine_version_minor = 0 -// ASM: machine_version_stepping = 0 +// ASM: amd_code_version_major = 7 +// ASM: amd_code_version_minor = 0 +// ASM: amd_machine_kind = 1 +// ASM: amd_machine_version_major = 7 +// ASM: amd_machine_version_minor = 0 +// ASM: amd_machine_version_stepping = 0 // ASM: kernel_code_entry_byte_offset = 256 // ASM: kernel_code_prefetch_byte_size = 0 // ASM: max_scratch_backing_memory_byte_size = 0 -// ASM: compute_pgm_rsrc1_vgprs = 1 -// ASM: compute_pgm_rsrc1_sgprs = 1 -// ASM: compute_pgm_rsrc1_priority = 0 -// ASM: compute_pgm_rsrc1_float_mode = 0 -// ASM: compute_pgm_rsrc1_priv = 0 -// ASM: compute_pgm_rsrc1_dx10_clamp = 0 -// ASM: compute_pgm_rsrc1_debug_mode = 0 -// ASM: compute_pgm_rsrc1_ieee_mode = 0 -// ASM: compute_pgm_rsrc2_scratch_en = 0 -// ASM: compute_pgm_rsrc2_user_sgpr = 2 -// ASM: compute_pgm_rsrc2_tgid_x_en = 0 -// ASM: compute_pgm_rsrc2_tgid_y_en = 0 -// ASM: compute_pgm_rsrc2_tgid_z_en = 0 -// ASM: compute_pgm_rsrc2_tg_size_en = 0 -// ASM: compute_pgm_rsrc2_tidig_comp_cnt = 0 -// ASM: compute_pgm_rsrc2_excp_en_msb = 0 -// ASM: compute_pgm_rsrc2_lds_size = 0 -// ASM: compute_pgm_rsrc2_excp_en = 0 +// ASM: granulated_workitem_vgpr_count = 1 +// ASM: granulated_wavefront_sgpr_count = 1 +// ASM: priority = 0 +// ASM: float_mode = 0 +// ASM: priv = 0 +// ASM: enable_dx10_clamp = 0 +// ASM: debug_mode = 0 +// ASM: enable_ieee_mode = 0 +// ASM: enable_sgpr_private_segment_wave_byte_offset = 0 +// ASM: user_sgpr_count = 2 +// ASM: enable_sgpr_workgroup_id_x = 0 +// ASM: enable_sgpr_workgroup_id_y = 0 +// ASM: enable_sgpr_workgroup_id_z = 0 +// ASM: enable_sgpr_workgroup_info = 0 +// ASM: enable_vgpr_workitem_id = 0 +// ASM: enable_exception_msb = 0 +// ASM: granulated_lds_size = 0 +// ASM: enable_exception = 0 // ASM: enable_sgpr_private_segment_buffer = 0 // ASM: enable_sgpr_dispatch_ptr = 0 // ASM: enable_sgpr_queue_ptr = 0 diff --git a/llvm/test/MC/AMDGPU/hsa.s b/llvm/test/MC/AMDGPU/hsa.s index 27de3d5..b95a790 100644 --- a/llvm/test/MC/AMDGPU/hsa.s +++ b/llvm/test/MC/AMDGPU/hsa.s @@ -42,99 +42,99 @@ amd_kernel_code_t_test_all: ; Test all amd_kernel_code_t members with non-default values. .amd_kernel_code_t - kernel_code_version_major = 100 - kernel_code_version_minor = 100 - machine_kind = 0 - machine_version_major = 5 - machine_version_minor = 5 - machine_version_stepping = 5 - kernel_code_entry_byte_offset = 512 - kernel_code_prefetch_byte_size = 1 - max_scratch_backing_memory_byte_size = 1 - compute_pgm_rsrc1_vgprs = 1 - compute_pgm_rsrc1_sgprs = 1 - compute_pgm_rsrc1_priority = 1 - compute_pgm_rsrc1_float_mode = 1 - compute_pgm_rsrc1_priv = 1 - compute_pgm_rsrc1_dx10_clamp = 1 - compute_pgm_rsrc1_debug_mode = 1 - compute_pgm_rsrc1_ieee_mode = 1 - compute_pgm_rsrc2_scratch_en = 1 - compute_pgm_rsrc2_user_sgpr = 1 - compute_pgm_rsrc2_tgid_x_en = 1 - compute_pgm_rsrc2_tgid_y_en = 1 - compute_pgm_rsrc2_tgid_z_en = 1 - compute_pgm_rsrc2_tg_size_en = 1 - compute_pgm_rsrc2_tidig_comp_cnt = 1 - compute_pgm_rsrc2_excp_en_msb = 1 - compute_pgm_rsrc2_lds_size = 1 - compute_pgm_rsrc2_excp_en = 1 - enable_sgpr_private_segment_buffer = 1 - enable_sgpr_dispatch_ptr = 1 - enable_sgpr_queue_ptr = 1 - enable_sgpr_kernarg_segment_ptr = 1 - enable_sgpr_dispatch_id = 1 - enable_sgpr_flat_scratch_init = 1 - enable_sgpr_private_segment_size = 1 - enable_sgpr_grid_workgroup_count_x = 1 - enable_sgpr_grid_workgroup_count_y = 1 - enable_sgpr_grid_workgroup_count_z = 1 - enable_ordered_append_gds = 1 - private_element_size = 1 - is_ptr64 = 1 - is_dynamic_callstack = 1 - is_debug_enabled = 1 - is_xnack_enabled = 1 - workitem_private_segment_byte_size = 1 - workgroup_group_segment_byte_size = 1 - gds_segment_byte_size = 1 - kernarg_segment_byte_size = 1 - workgroup_fbarrier_count = 1 - wavefront_sgpr_count = 1 - workitem_vgpr_count = 1 - reserved_vgpr_first = 1 - reserved_vgpr_count = 1 - reserved_sgpr_first = 1 - reserved_sgpr_count = 1 - debug_wavefront_private_segment_offset_sgpr = 1 - debug_private_segment_buffer_sgpr = 1 - kernarg_segment_alignment = 5 - group_segment_alignment = 5 - private_segment_alignment = 5 - wavefront_size = 5 - call_convention = 1 - runtime_loader_kernel_symbol = 1 + kernel_code_version_major = 100 + kernel_code_version_minor = 100 + machine_kind = 0 + machine_version_major = 5 + machine_version_minor = 5 + machine_version_stepping = 5 + kernel_code_entry_byte_offset = 512 + kernel_code_prefetch_byte_size = 1 + max_scratch_backing_memory_byte_size = 1 + compute_pgm_rsrc1_vgprs = 1 + compute_pgm_rsrc1_sgprs = 1 + compute_pgm_rsrc1_priority = 1 + compute_pgm_rsrc1_float_mode = 1 + compute_pgm_rsrc1_priv = 1 + compute_pgm_rsrc1_dx10_clamp = 1 + compute_pgm_rsrc1_debug_mode = 1 + compute_pgm_rsrc1_ieee_mode = 1 + compute_pgm_rsrc2_scratch_en = 1 + compute_pgm_rsrc2_user_sgpr = 1 + compute_pgm_rsrc2_tgid_x_en = 1 + compute_pgm_rsrc2_tgid_y_en = 1 + compute_pgm_rsrc2_tgid_z_en = 1 + compute_pgm_rsrc2_tg_size_en = 1 + compute_pgm_rsrc2_tidig_comp_cnt = 1 + compute_pgm_rsrc2_excp_en_msb = 1 + compute_pgm_rsrc2_lds_size = 1 + compute_pgm_rsrc2_excp_en = 1 + enable_sgpr_private_segment_buffer = 1 + enable_sgpr_dispatch_ptr = 1 + enable_sgpr_queue_ptr = 1 + enable_sgpr_kernarg_segment_ptr = 1 + enable_sgpr_dispatch_id = 1 + enable_sgpr_flat_scratch_init = 1 + enable_sgpr_private_segment_size = 1 + enable_sgpr_grid_workgroup_count_x = 1 + enable_sgpr_grid_workgroup_count_y = 1 + enable_sgpr_grid_workgroup_count_z = 1 + enable_ordered_append_gds = 1 + private_element_size = 1 + is_ptr64 = 1 + is_dynamic_callstack = 1 + is_debug_enabled = 1 + is_xnack_enabled = 1 + workitem_private_segment_byte_size = 1 + workgroup_group_segment_byte_size = 1 + gds_segment_byte_size = 1 + kernarg_segment_byte_size = 1 + workgroup_fbarrier_count = 1 + wavefront_sgpr_count = 1 + workitem_vgpr_count = 1 + reserved_vgpr_first = 1 + reserved_vgpr_count = 1 + reserved_sgpr_first = 1 + reserved_sgpr_count = 1 + debug_wavefront_private_segment_offset_sgpr = 1 + debug_private_segment_buffer_sgpr = 1 + kernarg_segment_alignment = 5 + group_segment_alignment = 5 + private_segment_alignment = 5 + wavefront_size = 5 + call_convention = 1 + runtime_loader_kernel_symbol = 1 .end_amd_kernel_code_t // ASM-LABEL: {{^}}amd_kernel_code_t_test_all: // ASM: .amd_kernel_code_t -// ASM: kernel_code_version_major = 100 -// ASM: kernel_code_version_minor = 100 -// ASM: machine_kind = 0 -// ASM: machine_version_major = 5 -// ASM: machine_version_minor = 5 -// ASM: machine_version_stepping = 5 +// ASM: amd_code_version_major = 100 +// ASM: amd_code_version_minor = 100 +// ASM: amd_machine_kind = 0 +// ASM: amd_machine_version_major = 5 +// ASM: amd_machine_version_minor = 5 +// ASM: amd_machine_version_stepping = 5 // ASM: kernel_code_entry_byte_offset = 512 // ASM: kernel_code_prefetch_byte_size = 1 // ASM: max_scratch_backing_memory_byte_size = 1 -// ASM: compute_pgm_rsrc1_vgprs = 1 -// ASM: compute_pgm_rsrc1_sgprs = 1 -// ASM: compute_pgm_rsrc1_priority = 1 -// ASM: compute_pgm_rsrc1_float_mode = 1 -// ASM: compute_pgm_rsrc1_priv = 1 -// ASM: compute_pgm_rsrc1_dx10_clamp = 1 -// ASM: compute_pgm_rsrc1_debug_mode = 1 -// ASM: compute_pgm_rsrc1_ieee_mode = 1 -// ASM: compute_pgm_rsrc2_scratch_en = 1 -// ASM: compute_pgm_rsrc2_user_sgpr = 1 -// ASM: compute_pgm_rsrc2_tgid_x_en = 1 -// ASM: compute_pgm_rsrc2_tgid_y_en = 1 -// ASM: compute_pgm_rsrc2_tgid_z_en = 1 -// ASM: compute_pgm_rsrc2_tg_size_en = 1 -// ASM: compute_pgm_rsrc2_tidig_comp_cnt = 1 -// ASM: compute_pgm_rsrc2_excp_en_msb = 1 -// ASM: compute_pgm_rsrc2_lds_size = 1 -// ASM: compute_pgm_rsrc2_excp_en = 1 +// ASM: granulated_workitem_vgpr_count = 1 +// ASM: granulated_wavefront_sgpr_count = 1 +// ASM: priority = 1 +// ASM: float_mode = 1 +// ASM: priv = 1 +// ASM: enable_dx10_clamp = 1 +// ASM: debug_mode = 1 +// ASM: enable_ieee_mode = 1 +// ASM: enable_sgpr_private_segment_wave_byte_offset = 1 +// ASM: user_sgpr_count = 1 +// ASM: enable_sgpr_workgroup_id_x = 1 +// ASM: enable_sgpr_workgroup_id_y = 1 +// ASM: enable_sgpr_workgroup_id_z = 1 +// ASM: enable_sgpr_workgroup_info = 1 +// ASM: enable_vgpr_workitem_id = 1 +// ASM: enable_exception_msb = 1 +// ASM: granulated_lds_size = 1 +// ASM: enable_exception = 1 // ASM: enable_sgpr_private_segment_buffer = 1 // ASM: enable_sgpr_dispatch_ptr = 1 // ASM: enable_sgpr_queue_ptr = 1 @@ -176,9 +176,9 @@ amd_kernel_code_t_minimal: .amd_kernel_code_t enable_sgpr_kernarg_segment_ptr = 1 is_ptr64 = 1 - compute_pgm_rsrc1_vgprs = 1 - compute_pgm_rsrc1_sgprs = 1 - compute_pgm_rsrc2_user_sgpr = 2 + granulated_workitem_vgpr_count = 1 + granulated_wavefront_sgpr_count = 1 + user_sgpr_count = 2 kernarg_segment_byte_size = 16 wavefront_sgpr_count = 8 // wavefront_sgpr_count = 7 @@ -186,39 +186,39 @@ amd_kernel_code_t_minimal: // Make sure a blank line won't break anything: // Make sure a line with whitespace won't break anything: - + workitem_vgpr_count = 16 .end_amd_kernel_code_t // ASM-LABEL: {{^}}amd_kernel_code_t_minimal: // ASM: .amd_kernel_code_t -// ASM: kernel_code_version_major = 1 -// ASM: kernel_code_version_minor = 0 -// ASM: machine_kind = 1 -// ASM: machine_version_major = 7 -// ASM: machine_version_minor = 0 -// ASM: machine_version_stepping = 0 +// ASM: amd_code_version_major = 1 +// ASM: amd_code_version_minor = 0 +// ASM: amd_machine_kind = 1 +// ASM: amd_machine_version_major = 7 +// ASM: amd_machine_version_minor = 0 +// ASM: amd_machine_version_stepping = 0 // ASM: kernel_code_entry_byte_offset = 256 // ASM: kernel_code_prefetch_byte_size = 0 // ASM: max_scratch_backing_memory_byte_size = 0 -// ASM: compute_pgm_rsrc1_vgprs = 1 -// ASM: compute_pgm_rsrc1_sgprs = 1 -// ASM: compute_pgm_rsrc1_priority = 0 -// ASM: compute_pgm_rsrc1_float_mode = 0 -// ASM: compute_pgm_rsrc1_priv = 0 -// ASM: compute_pgm_rsrc1_dx10_clamp = 0 -// ASM: compute_pgm_rsrc1_debug_mode = 0 -// ASM: compute_pgm_rsrc1_ieee_mode = 0 -// ASM: compute_pgm_rsrc2_scratch_en = 0 -// ASM: compute_pgm_rsrc2_user_sgpr = 2 -// ASM: compute_pgm_rsrc2_tgid_x_en = 0 -// ASM: compute_pgm_rsrc2_tgid_y_en = 0 -// ASM: compute_pgm_rsrc2_tgid_z_en = 0 -// ASM: compute_pgm_rsrc2_tg_size_en = 0 -// ASM: compute_pgm_rsrc2_tidig_comp_cnt = 0 -// ASM: compute_pgm_rsrc2_excp_en_msb = 0 -// ASM: compute_pgm_rsrc2_lds_size = 0 -// ASM: compute_pgm_rsrc2_excp_en = 0 +// ASM: granulated_workitem_vgpr_count = 1 +// ASM: granulated_wavefront_sgpr_count = 1 +// ASM: priority = 0 +// ASM: float_mode = 0 +// ASM: priv = 0 +// ASM: enable_dx10_clamp = 0 +// ASM: debug_mode = 0 +// ASM: enable_ieee_mode = 0 +// ASM: enable_sgpr_private_segment_wave_byte_offset = 0 +// ASM: user_sgpr_count = 2 +// ASM: enable_sgpr_workgroup_id_x = 0 +// ASM: enable_sgpr_workgroup_id_y = 0 +// ASM: enable_sgpr_workgroup_id_z = 0 +// ASM: enable_sgpr_workgroup_info = 0 +// ASM: enable_vgpr_workitem_id = 0 +// ASM: enable_exception_msb = 0 +// ASM: granulated_lds_size = 0 +// ASM: enable_exception = 0 // ASM: enable_sgpr_private_segment_buffer = 0 // ASM: enable_sgpr_dispatch_ptr = 0 // ASM: enable_sgpr_queue_ptr = 0