- Add target id support (https://clang.llvm.org/docs/ClangOffloadBundler.html#target-id)
- Add code object v4 support (https://llvm.org/docs/AMDGPUUsage.html#elf-code-object)
- Add kernarg_size to kernel descriptor
- Change trap handler ABI to no longer move queue pointer into s[0:1]
- Cleanup ELF definitions
- Add V2, V3, V4 suffixes to make a clear distinction for code object version
- Consolidate note names
Differential Revision: https://reviews.llvm.org/D95638
# REQUIRES: amdgpu
-# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s -o %t.o
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj %s -o %t.o
# RUN: ld.lld -shared %t.o -o %t.so
# RUN: llvm-readobj --file-headers %t.so | FileCheck %s
; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D
; AMDHSA: OS/ABI: AMDGPU_HSA (0x40)
-; AMDHSA: ABIVersion: 1
+; AMDHSA: ABIVersion: 2
; AMDPAL: OS/ABI: AMDGPU_PAL (0x41)
; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
// was never defined for V1.
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
+ ELFABIVERSION_AMDGPU_HSA_V4 = 2
};
#define ELF_RELOC(name, value) name = value,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
- EF_AMDGPU_XNACK = 0x100,
- // Indicates if the "sram-ecc" target feature is enabled for all code
+ //
+ // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
+ EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
+ // Indicates if the trap handler is enabled for all code contained
+ // in the object.
+ //
+ // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
+ EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,
+
+ // Indicates if the "xnack" target feature is enabled for all code contained
+ // in the object.
+ //
+ // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
+ EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
+ // Indicates if the "sramecc" target feature is enabled for all code
// contained in the object.
- EF_AMDGPU_SRAM_ECC = 0x200,
+ //
+ // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
+ EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
+
+ // XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
+ //
+ // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
+ EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
+ // XNACK is not supported.
+ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
+ // XNACK is any/default/unspecified.
+ EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
+ // XNACK is off.
+ EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
+ // XNACK is on.
+ EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
+
+ // SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
+ //
+ // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
+ EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
+ // SRAMECC is not supported.
+ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
+ // SRAMECC is any/default/unspecified.
+ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
+ // SRAMECC is off.
+ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
+ // SRAMECC is on.
+ EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
};
// ELF Relocation types for AMDGPU
SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON
};
-// AMD specific notes. (Code Object V2)
+// AMD vendor specific notes. (Code Object V2)
enum {
- // Note types with values between 0 and 9 (inclusive) are reserved.
- NT_AMD_AMDGPU_HSA_METADATA = 10,
- NT_AMD_AMDGPU_ISA = 11,
- NT_AMD_AMDGPU_PAL_METADATA = 12
+ NT_AMD_HSA_CODE_OBJECT_VERSION = 1,
+ NT_AMD_HSA_HSAIL = 2,
+ NT_AMD_HSA_ISA_VERSION = 3,
+ // Note types with values between 4 and 9 (inclusive) are reserved.
+ NT_AMD_HSA_METADATA = 10,
+ NT_AMD_HSA_ISA_NAME = 11,
+ NT_AMD_PAL_METADATA = 12
};
-// AMDGPU specific notes. (Code Object V3)
+// AMDGPU vendor specific notes. (Code Object V3)
enum {
// Note types with values between 0 and 31 (inclusive) are reserved.
NT_AMDGPU_METADATA = 32
return nullptr;
}
+ // For any initialization at the beginning of parsing.
+ virtual void onBeginOfFile() {}
+
// For any checks or cleanups at the end of parsing.
virtual void onEndOfFile() {}
};
const unsigned *OperandCycles; // Itinerary operand cycles
const unsigned *ForwardingPaths;
FeatureBitset FeatureBits; // Feature bits for current CPU + FS
+ std::string FeatureString; // Feature string
public:
MCSubtargetInfo(const MCSubtargetInfo &) = default;
FeatureBits = FeatureBits_;
}
+ StringRef getFeatureString() const { return FeatureString; }
+
bool hasFeature(unsigned Feature) const {
return FeatureBits[Feature];
}
//===----------------------------------------------------------------------===//
namespace HSAMD {
-/// HSA metadata major version.
-constexpr uint32_t VersionMajor = 1;
-/// HSA metadata minor version.
-constexpr uint32_t VersionMinor = 0;
+/// HSA metadata major version for code object V2.
+constexpr uint32_t VersionMajorV2 = 1;
+/// HSA metadata minor version for code object V2.
+constexpr uint32_t VersionMinorV2 = 0;
+
+/// HSA metadata major version for code object V3.
+constexpr uint32_t VersionMajorV3 = 1;
+/// HSA metadata minor version for code object V3.
+constexpr uint32_t VersionMinorV3 = 0;
+
+/// HSA metadata major version for code object V4.
+constexpr uint32_t VersionMajorV4 = 1;
+/// HSA metadata minor version for code object V4.
+constexpr uint32_t VersionMinorV4 = 1;
/// HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
struct kernel_descriptor_t {
uint32_t group_segment_fixed_size;
uint32_t private_segment_fixed_size;
- uint8_t reserved0[8];
+ uint32_t kernarg_size;
+ uint8_t reserved0[4];
int64_t kernel_code_entry_byte_offset;
uint8_t reserved1[20];
uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
enum : uint32_t {
GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
- RESERVED0_OFFSET = 8,
+ KERNARG_SIZE_OFFSET = 8,
+ RESERVED0_OFFSET = 12,
KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
RESERVED1_OFFSET = 24,
COMPUTE_PGM_RSRC3_OFFSET = 44,
static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
"invalid offset for private_segment_fixed_size");
+static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
+ KERNARG_SIZE_OFFSET,
+ "invalid offset for kernarg_size");
static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
"invalid offset for reserved0");
static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
(void)InsertResult;
}
+ getTargetParser().onBeginOfFile();
+
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof)) {
ParseStatementInfo Info(&AsmStrRewrites);
(void)InsertResult;
}
+ getTargetParser().onBeginOfFile();
+
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof) ||
SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU,
StringRef FS) {
FeatureBits = getFeatures(CPU, TuneCPU, FS, ProcDesc, ProcFeatures);
+ FeatureString = std::string(FS);
+
if (!TuneCPU.empty())
CPUSchedModel = &getSchedModelForCPU(TuneCPU);
else
void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef TuneCPU,
StringRef FS) {
FeatureBits = getFeatures(CPU, TuneCPU, FS, ProcDesc, ProcFeatures);
+ FeatureString = std::string(FS);
}
MCSubtargetInfo::MCSubtargetInfo(const Triple &TT, StringRef C, StringRef TC,
ECase(NT_FREEBSD_PROCSTAT_PSSTRINGS);
ECase(NT_FREEBSD_PROCSTAT_AUXV);
// AMD specific notes. (Code Object V2)
- ECase(NT_AMD_AMDGPU_HSA_METADATA);
- ECase(NT_AMD_AMDGPU_ISA);
- ECase(NT_AMD_AMDGPU_PAL_METADATA);
+ ECase(NT_AMD_HSA_METADATA);
+ ECase(NT_AMD_HSA_ISA_NAME);
+ ECase(NT_AMD_PAL_METADATA);
// AMDGPU specific notes. (Code Object V3)
ECase(NT_AMDGPU_METADATA);
#undef ECase
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1031, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1032, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1033, EF_AMDGPU_MACH);
- BCase(EF_AMDGPU_XNACK);
- BCase(EF_AMDGPU_SRAM_ECC);
+ switch (Object->Header.ABIVersion) {
+ default:
+ // ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
+ LLVM_FALLTHROUGH;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
+ BCase(EF_AMDGPU_FEATURE_XNACK_V3);
+ BCase(EF_AMDGPU_FEATURE_SRAMECC_V3);
+ break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4,
+ EF_AMDGPU_FEATURE_XNACK_V4);
+ BCaseMask(EF_AMDGPU_FEATURE_XNACK_ANY_V4,
+ EF_AMDGPU_FEATURE_XNACK_V4);
+ BCaseMask(EF_AMDGPU_FEATURE_XNACK_OFF_V4,
+ EF_AMDGPU_FEATURE_XNACK_V4);
+ BCaseMask(EF_AMDGPU_FEATURE_XNACK_ON_V4,
+ EF_AMDGPU_FEATURE_XNACK_V4);
+ BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4,
+ EF_AMDGPU_FEATURE_SRAMECC_V4);
+ BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_ANY_V4,
+ EF_AMDGPU_FEATURE_SRAMECC_V4);
+ BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_OFF_V4,
+ EF_AMDGPU_FEATURE_SRAMECC_V4);
+ BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_ON_V4,
+ EF_AMDGPU_FEATURE_SRAMECC_V4);
+ break;
+ }
break;
default:
break;
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)) {
+ : AsmPrinter(TM, std::move(Streamer)) {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
if (isHsaAbiVersion2(getGlobalSTI())) {
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
- } else {
+ } else if (isHsaAbiVersion3(getGlobalSTI())) {
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
+ } else {
+ HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4());
}
}
}
}
void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
- if (isHsaAbiVersion3(getGlobalSTI())) {
- std::string ExpectedTarget;
- raw_string_ostream ExpectedTargetOS(ExpectedTarget);
- IsaInfo::streamIsaVersion(getGlobalSTI(), ExpectedTargetOS);
-
- getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
- }
+ // TODO: Which one is called first, emitStartOfAsmFile or
+ // emitFunctionBodyStart?
+ if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
+ initializeTargetID(M);
if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
TM.getTargetTriple().getOS() != Triple::AMDPAL)
return;
+ if (isHsaAbiVersion3Or4(getGlobalSTI()))
+ getTargetStreamer()->EmitDirectiveAMDGCNTarget();
+
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
- HSAMetadataStream->begin(M);
+ HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
getTargetStreamer()->getPALMetadata()->readFromIR(M);
- if (isHsaAbiVersion3(getGlobalSTI()))
+ if (isHsaAbiVersion3Or4(getGlobalSTI()))
return;
- // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
+ // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
- // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
+ // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU());
- getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
+ getTargetStreamer()->EmitDirectiveHSACodeObjectISAV2(
Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
return;
if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
- isHsaAbiVersion2(getGlobalSTI())) {
- // Emit ISA Version (NT_AMD_AMDGPU_ISA).
- std::string ISAVersionString;
- raw_string_ostream ISAVersionStream(ISAVersionString);
- IsaInfo::streamIsaVersion(getGlobalSTI(), ISAVersionStream);
- getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
- }
+ isHsaAbiVersion2(getGlobalSTI()))
+ getTargetStreamer()->EmitISAVersion();
// Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
+ // Emit HSA Metadata (NT_AMD_HSA_METADATA).
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
HSAMetadataStream->end();
bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
+ const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
+ const Function &F = MF->getFunction();
+
+ // TODO: Which one is called first, emitStartOfAsmFile or
+ // emitFunctionBodyStart?
+ if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
+ initializeTargetID(*F.getParent());
+
+ const auto &FunctionTargetID = STM.getTargetID();
+ // Make sure function's xnack settings are compatible with module's
+ // xnack settings.
+ if (FunctionTargetID.isXnackSupported() &&
+ FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
+ FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) {
+ OutContext.reportError({}, "xnack setting of '" + Twine(MF->getName()) +
+ "' function does not match module xnack setting");
+ return;
+ }
+ // Make sure function's sramecc settings are compatible with module's
+ // sramecc settings.
+ if (FunctionTargetID.isSramEccSupported() &&
+ FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
+ FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) {
+ OutContext.reportError({}, "sramecc setting of '" + Twine(MF->getName()) +
+ "' function does not match module sramecc setting");
+ return;
+ }
+
if (!MFI.isEntryFunction())
return;
- const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
- const Function &F = MF->getFunction();
if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
if (ReadOnlySection.getAlignment() < 64)
ReadOnlySection.setAlignment(Align(64));
- const MCSubtargetInfo &STI = MF->getSubtarget();
+ const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
SmallString<128> KernelName;
getNameWithPrefix(KernelName, &MF->getFunction());
getTargetStreamer()->EmitAmdhsaKernelDescriptor(
- STI, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
+ STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
- IsaInfo::getNumExtraSGPRs(&STI,
+ IsaInfo::getNumExtraSGPRs(&STM,
CurrentProgramInfo.VCCUsed,
CurrentProgramInfo.FlatUsed),
- CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
- hasXNACK(STI));
+ CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
Streamer.PopSection();
}
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
- isHsaAbiVersion3(getGlobalSTI())) {
+ isHsaAbiVersion3Or4(getGlobalSTI())) {
AsmPrinter::emitFunctionEntryLabel();
return;
}
const MachineFunction &MF,
const SIProgramInfo &PI) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ const Function &F = MF.getFunction();
+
amdhsa::kernel_descriptor_t KernelDescriptor;
memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+
+ Align MaxKernArgAlign;
+ KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+
KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
return false;
}
+// TODO: Fold this into emitFunctionBodyStart.
+void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
+ // In the beginning all features are either 'Any' or 'NotSupported',
+ // depending on global target features. This will cover empty modules.
+ getTargetStreamer()->initializeTargetID(
+ *getGlobalSTI(), getGlobalSTI()->getFeatureString());
+
+ // If module is empty, we are done.
+ if (M.empty())
+ return;
+
+ // If module is not empty, need to find first 'Off' or 'On' feature
+ // setting per feature from functions in module.
+ for (auto &F : M) {
+ auto &TSTargetID = getTargetStreamer()->getTargetID();
+ if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
+ (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
+ break;
+
+ const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
+ const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();
+ if (TSTargetID->isXnackSupported())
+ if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
+ TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());
+ if (TSTargetID->isSramEccSupported())
+ if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
+ TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
+ }
+}
+
uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = STM.getInstrInfo();
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
const GCNSubtarget &ST) const {
- return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
- UsesVCC, UsesFlatScratch);
+ return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(
+ &ST, UsesVCC, UsesFlatScratch, ST.getTargetID().isXnackOnOrAny());
}
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(
int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
};
+ void initializeTargetID(const Module &M);
+
SIProgramInfo CurrentProgramInfo;
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
void MetadataStreamerV2::emitVersion() {
auto &Version = HSAMetadata.mVersion;
- Version.push_back(VersionMajor);
- Version.push_back(VersionMinor);
+ Version.push_back(VersionMajorV2);
+ Version.push_back(VersionMinorV2);
}
void MetadataStreamerV2::emitPrintf(const Module &Mod) {
return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
}
-void MetadataStreamerV2::begin(const Module &Mod) {
+void MetadataStreamerV2::begin(const Module &Mod,
+ const IsaInfo::AMDGPUTargetID &TargetID) {
emitVersion();
emitPrintf(Mod);
}
void MetadataStreamerV3::emitVersion() {
auto Version = HSAMetadataDoc->getArrayNode();
- Version.push_back(Version.getDocument()->getNode(VersionMajor));
- Version.push_back(Version.getDocument()->getNode(VersionMinor));
+ Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
+ Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
getRootMetadata("amdhsa.version") = Version;
}
return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
}
-void MetadataStreamerV3::begin(const Module &Mod) {
+void MetadataStreamerV3::begin(const Module &Mod,
+ const IsaInfo::AMDGPUTargetID &TargetID) {
emitVersion();
emitPrintf(Mod);
getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
Kernels.push_back(Kern);
}
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV4
+//===----------------------------------------------------------------------===//
+
+void MetadataStreamerV4::emitVersion() {
+ auto Version = HSAMetadataDoc->getArrayNode();
+ Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
+ Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
+ getRootMetadata("amdhsa.version") = Version;
+}
+
+void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) {
+ getRootMetadata("amdhsa.target") =
+ HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
+}
+
+void MetadataStreamerV4::begin(const Module &Mod,
+ const IsaInfo::AMDGPUTargetID &TargetID) {
+ emitVersion();
+ emitTargetID(TargetID);
+ emitPrintf(Mod);
+ getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
+}
+
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/Alignment.h"
virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0;
- virtual void begin(const Module &Mod) = 0;
+ virtual void begin(const Module &Mod,
+ const IsaInfo::AMDGPUTargetID &TargetID) = 0;
virtual void end() = 0;
const SIProgramInfo &ProgramInfo) = 0;
};
-class MetadataStreamerV3 final : public MetadataStreamer {
-private:
+// TODO: Rename MetadataStreamerV3 -> MetadataStreamerMsgPackV3.
+class MetadataStreamerV3 : public MetadataStreamer {
+protected:
std::unique_ptr<msgpack::Document> HSAMetadataDoc =
std::make_unique<msgpack::Document>();
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
- void begin(const Module &Mod) override;
+ void begin(const Module &Mod,
+ const IsaInfo::AMDGPUTargetID &TargetID) override;
void end() override;
const SIProgramInfo &ProgramInfo) override;
};
+// TODO: Rename MetadataStreamerV4 -> MetadataStreamerMsgPackV4.
+class MetadataStreamerV4 final : public MetadataStreamerV3 {
+ void emitVersion();
+
+ void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
+
+public:
+ MetadataStreamerV4() = default;
+ ~MetadataStreamerV4() = default;
+
+ void begin(const Module &Mod,
+ const IsaInfo::AMDGPUTargetID &TargetID) override;
+};
+
+// TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2.
class MetadataStreamerV2 final : public MetadataStreamer {
private:
Metadata HSAMetadata;
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
- void begin(const Module &Mod) override;
+ void begin(const Module &Mod,
+ const IsaInfo::AMDGPUTargetID &TargetID) override;
void end() override;
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
- // Is non-HSA path or trap-handler disabled? then, insert s_endpgm instruction
- if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
- !ST.isTrapHandlerEnabled()) {
- B.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
- } else {
- // Pass queue pointer to trap handler as input, and insert trap instruction
- // Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
- MachineRegisterInfo &MRI = *B.getMRI();
+ if (!ST.isTrapHandlerEnabled() ||
+ ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
+ return legalizeTrapEndpgm(MI, MRI, B);
+
+ if (Optional<uint8_t> HsaAbiVer = AMDGPU::getHsaAbiVersion(&ST)) {
+ switch (*HsaAbiVer) {
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
+ return legalizeTrapHsaQueuePtr(MI, MRI, B);
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ return ST.supportsGetDoorbellID() ?
+ legalizeTrapHsa(MI, MRI, B) :
+ legalizeTrapHsaQueuePtr(MI, MRI, B);
+ }
+ }
- Register LiveIn =
- MRI.createGenericVirtualRegister(LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
- if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
- return false;
+ llvm_unreachable("Unknown trap handler");
+}
- Register SGPR01(AMDGPU::SGPR0_SGPR1);
- B.buildCopy(SGPR01, LiveIn);
- B.buildInstr(AMDGPU::S_TRAP)
- .addImm(GCNSubtarget::TrapIDLLVMTrap)
- .addReg(SGPR01, RegState::Implicit);
- }
+bool AMDGPULegalizerInfo::legalizeTrapEndpgm(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
+ B.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
+ MI.eraseFromParent();
+ return true;
+}
+bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
+ // Pass queue pointer to trap handler as input, and insert trap instruction
+ // Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
+ Register LiveIn =
+ MRI.createGenericVirtualRegister(LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
+ if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
+ return false;
+
+ Register SGPR01(AMDGPU::SGPR0_SGPR1);
+ B.buildCopy(SGPR01, LiveIn);
+ B.buildInstr(AMDGPU::S_TRAP)
+ .addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSATrap))
+ .addReg(SGPR01, RegState::Implicit);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeTrapHsa(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
+ B.buildInstr(AMDGPU::S_TRAP)
+ .addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSATrap));
MI.eraseFromParent();
return true;
}
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
// Is non-HSA path or trap-handler disabled? then, report a warning
// accordingly
- if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
- !ST.isTrapHandlerEnabled()) {
+ if (!ST.isTrapHandlerEnabled() ||
+ ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
DiagnosticInfoUnsupported NoTrap(B.getMF().getFunction(),
"debugtrap handler not supported",
MI.getDebugLoc(), DS_Warning);
Ctx.diagnose(NoTrap);
} else {
// Insert debug-trap instruction
- B.buildInstr(AMDGPU::S_TRAP).addImm(GCNSubtarget::TrapIDLLVMDebugTrap);
+ B.buildInstr(AMDGPU::S_TRAP)
+ .addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSADebugTrap));
}
MI.eraseFromParent();
bool legalizeTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeTrapEndpgm(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeTrapHsaQueuePtr(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeTrapHsa(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeDebugTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
const char NoteNameV2[] = "AMD";
const char NoteNameV3[] = "AMDGPU";
-// TODO: Remove this file once we drop code object v2.
-enum NoteType{
- NT_AMDGPU_HSA_RESERVED_0 = 0,
- NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
- NT_AMDGPU_HSA_HSAIL = 2,
- NT_AMDGPU_HSA_ISA = 3,
- NT_AMDGPU_HSA_PRODUCER = 4,
- NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
- NT_AMDGPU_HSA_EXTENSION = 6,
- NT_AMDGPU_HSA_RESERVED_7 = 7,
- NT_AMDGPU_HSA_RESERVED_8 = 8,
- NT_AMDGPU_HSA_RESERVED_9 = 9,
- NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
- NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
-};
-
} // End namespace ElfNote
} // End namespace AMDGPU
} // End namespace llvm
bool ParseDirectiveHSACodeObjectISA();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
- bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
+ // TODO: Possibly make subtargetHasRegister const.
+ bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
bool ParseDirectiveAMDGPUHsaKernel();
bool ParseDirectiveISAVersion();
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
- if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
- if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
initializeGprCountSymbol(IS_VGPR);
initializeGprCountSymbol(IS_SGPR);
} else
}
}
- bool hasXNACK() const {
- return AMDGPU::hasXNACK(getSTI());
- }
-
bool hasMIMG_R128() const {
return AMDGPU::hasMIMG_R128(getSTI());
}
void lex();
public:
+ void onBeginOfFile() override;
+
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
return nullptr;
}
- if (isHsaAbiVersion3(&getSTI())) {
+ if (isHsaAbiVersion3Or4(&getSTI())) {
if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
return nullptr;
} else
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
return TokError("directive only supported for amdgcn architecture");
- std::string Target;
-
- SMLoc TargetStart = getLoc();
- if (getParser().parseEscapedString(Target))
+ std::string TargetIDDirective;
+ SMLoc TargetStart = getTok().getLoc();
+ if (getParser().parseEscapedString(TargetIDDirective))
return true;
- SMRange TargetRange = SMRange(TargetStart, getLoc());
-
- std::string ExpectedTarget;
- raw_string_ostream ExpectedTargetOS(ExpectedTarget);
- IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
- if (Target != ExpectedTargetOS.str())
- return Error(TargetRange.Start, "target must match options", TargetRange);
+ SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
+ if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
+ return getParser().Error(TargetRange.Start,
+ (Twine(".amdgcn_target directive's target id ") +
+ Twine(TargetIDDirective) +
+ Twine(" does not match the specified target id ") +
+ Twine(getTargetStreamer().getTargetID()->toString())).str());
- getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
return false;
}
unsigned UserSGPRCount = 0;
bool ReserveVCC = true;
bool ReserveFlatScr = true;
- bool ReserveXNACK = hasXNACK();
Optional<bool> EnableWavefrontSize32;
while (true) {
if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
KD.private_segment_fixed_size = Val;
+ } else if (ID == ".amdhsa_kernarg_size") {
+ if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
+ return OutOfRangeError(ValRange);
+ KD.kernarg_size = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
return Error(IDRange.Start, "directive requires gfx8+", IDRange);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
- ReserveXNACK = Val;
+ if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
+ return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
+ IDRange);
} else if (ID == ".amdhsa_float_round_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
unsigned VGPRBlocks;
unsigned SGPRBlocks;
if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
- ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
+ getTargetStreamer().getTargetID()->isXnackOnOrAny(),
+ EnableWavefrontSize32, NextFreeVGPR,
VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
SGPRBlocks))
return true;
getTargetStreamer().EmitAmdhsaKernelDescriptor(
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
- ReserveFlatScr, ReserveXNACK);
+ ReserveFlatScr);
return false;
}
// targeted GPU.
if (isToken(AsmToken::EndOfStatement)) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
- getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
- ISA.Stepping,
- "AMD", "AMDGPU");
+ getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
+ ISA.Stepping,
+ "AMD", "AMDGPU");
return false;
}
if (!parseString(ArchName, "invalid arch name"))
return true;
- getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
- VendorName, ArchName);
+ getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
+ VendorName, ArchName);
return false;
}
"architectures");
}
- auto ISAVersionStringFromASM = getToken().getStringContents();
-
- std::string ISAVersionStringFromSTI;
- raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
- IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
+ auto TargetIDDirective = getLexer().getTok().getStringContents();
+ if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
+ return Error(getParser().getTok().getLoc(), "target id must match options");
- if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
- return Error(getLoc(),
- ".amd_amdgpu_isa directive does not match triple and/or mcpu "
- "arguments specified through the command line");
- }
-
- getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
+ getTargetStreamer().EmitISAVersion();
Lex();
return false;
const char *AssemblerDirectiveBegin;
const char *AssemblerDirectiveEnd;
std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
- isHsaAbiVersion3(&getSTI())
+ isHsaAbiVersion3Or4(&getSTI())
? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
HSAMD::V3::AssemblerDirectiveEnd)
: std::make_tuple(HSAMD::AssemblerDirectiveBegin,
HSAMetadataString))
return true;
- if (isHsaAbiVersion3(&getSTI())) {
+ if (isHsaAbiVersion3Or4(&getSTI())) {
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
return Error(getLoc(), "invalid HSA metadata");
} else {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
- if (isHsaAbiVersion3(&getSTI())) {
- if (IDVal == ".amdgcn_target")
- return ParseDirectiveAMDGCNTarget();
-
+ if (isHsaAbiVersion3Or4(&getSTI())) {
if (IDVal == ".amdhsa_kernel")
- return ParseDirectiveAMDHSAKernel();
+ return ParseDirectiveAMDHSAKernel();
// TODO: Restructure/combine with PAL metadata directive.
if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
return ParseDirectiveHSAMetadata();
}
+ if (IDVal == ".amdgcn_target")
+ return ParseDirectiveAMDGCNTarget();
+
if (IDVal == ".amdgpu_lds")
return ParseDirectiveAMDGPULDS();
}
bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
- unsigned RegNo) const {
+ unsigned RegNo) {
for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
R.isValid(); ++R) {
case AMDGPU::XNACK_MASK:
case AMDGPU::XNACK_MASK_LO:
case AMDGPU::XNACK_MASK_HI:
- return (isVI() || isGFX9()) && hasXNACK();
+ return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
case AMDGPU::SGPR_NULL:
return isGFX10Plus();
default:
{"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
};
+void AMDGPUAsmParser::onBeginOfFile() {
+ if (!getParser().getStreamer().getTargetStreamer() ||
+ getSTI().getTargetTriple().getArch() == Triple::r600)
+ return;
+
+ if (!getTargetStreamer().getTargetID())
+ getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
+
+ if (isHsaAbiVersion3Or4(&getSTI()))
+ getTargetStreamer().EmitDirectiveAMDGCNTarget();
+}
+
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
OperandMatchResultTy res = parseOptionalOpr(Operands);
uint16_t TwoByteBuffer = 0;
uint32_t FourByteBuffer = 0;
- uint64_t EightByteBuffer = 0;
StringRef ReservedBytes;
StringRef Indent = "\t";
<< FourByteBuffer << '\n';
return MCDisassembler::Success;
+ case amdhsa::KERNARG_SIZE_OFFSET:
+ FourByteBuffer = DE.getU32(Cursor);
+ KdStream << Indent << ".amdhsa_kernarg_size "
+ << FourByteBuffer << '\n';
+ return MCDisassembler::Success;
+
case amdhsa::RESERVED0_OFFSET:
- // 8 reserved bytes, must be 0.
- EightByteBuffer = DE.getU64(Cursor);
- if (EightByteBuffer) {
- return MCDisassembler::Fail;
+ // 4 reserved bytes, must be 0.
+ ReservedBytes = DE.getBytes(Cursor, 4);
+ for (int I = 0; I < 4; ++I) {
+ if (ReservedBytes[I] != 0) {
+ return MCDisassembler::Fail;
+ }
}
return MCDisassembler::Success;
using AMDGPUSubtarget::getMaxWavesPerEU;
public:
- enum TrapHandlerAbi {
- TrapHandlerAbiNone = 0,
- TrapHandlerAbiHsa = 1
+ // Following 2 enums are documented at:
+ // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
+ enum class TrapHandlerAbi {
+ NONE = 0x00,
+ AMDHSA = 0x01,
};
- enum TrapID {
- TrapIDHardwareReserved = 0,
- TrapIDHSADebugTrap = 1,
- TrapIDLLVMTrap = 2,
- TrapIDLLVMDebugTrap = 3,
- TrapIDDebugBreakpoint = 7,
- TrapIDDebugReserved8 = 8,
- TrapIDDebugReservedFE = 0xfe,
- TrapIDDebugReservedFF = 0xff
- };
-
- enum TrapRegValues {
- LLVMTrapHandlerRegValue = 1
+ enum class TrapID {
+ LLVMAMDHSATrap = 0x02,
+ LLVMAMDHSADebugTrap = 0x03,
};
private:
return RegBankInfo.get();
}
+ const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {
+ return TargetID;
+ }
+
// Nothing implemented, just prevent crashes on use.
const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
TrapHandlerAbi getTrapHandlerAbi() const {
- return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
+ return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;
+ }
+
+ bool supportsGetDoorbellID() const {
+ // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
+ return getGeneration() >= GFX9;
}
/// True if the offset field of DS instructions works as expected. On SI, the
// AMDGPUTargetStreamer
//===----------------------------------------------------------------------===//
+static void convertIsaVersionV2(uint32_t &Major, uint32_t &Minor,
+ uint32_t &Stepping, bool Sramecc, bool Xnack) {
+ if (Major == 9 && Minor == 0) {
+ switch (Stepping) {
+ case 0:
+ case 2:
+ case 4:
+ case 6:
+ if (Xnack)
+ Stepping++;
+ }
+ }
+}
+
bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
HSAMD::Metadata HSAMetadata;
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
getPALMetadata()->reset();
}
-void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
- OS << "\t.amdgcn_target \"" << Target << "\"\n";
+void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
+ OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
}
void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
}
void
-AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
- uint32_t Minor,
- uint32_t Stepping,
- StringRef VendorName,
- StringRef ArchName) {
- OS << "\t.hsa_code_object_isa " <<
- Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
- ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
-
+AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
+ uint32_t Minor,
+ uint32_t Stepping,
+ StringRef VendorName,
+ StringRef ArchName) {
+ convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
+ OS << "\t.hsa_code_object_isa " << Twine(Major) << "," << Twine(Minor) << ","
+ << Twine(Stepping) << ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
}
void
<< Alignment.value() << '\n';
}
-bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
- OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
+bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
+ OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
return true;
}
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
- bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
+ bool ReserveVCC, bool ReserveFlatScr) {
IsaVersion IVersion = getIsaVersion(STI.getCPU());
OS << "\t.amdhsa_kernel " << KernelName << '\n';
<< '\n';
OS << "\t\t.amdhsa_private_segment_fixed_size "
<< KD.private_segment_fixed_size << '\n';
+ OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
kernel_code_properties,
OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
if (IVersion.Major >= 7 && !ReserveFlatScr)
OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
- if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
- OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
+
+ if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
+ switch (*HsaAbiVer) {
+ default:
+ break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
+ break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ if (getTargetID()->isXnackSupported())
+ OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
+ break;
+ }
+ }
PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
compute_pgm_rsrc1,
AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
const MCSubtargetInfo &STI)
- : AMDGPUTargetStreamer(S), Streamer(S), Os(STI.getTargetTriple().getOS()) {
- MCAssembler &MCA = getStreamer().getAssembler();
- unsigned EFlags = MCA.getELFHeaderEFlags();
-
- EFlags &= ~ELF::EF_AMDGPU_MACH;
- EFlags |= getElfMach(STI.getCPU());
-
- EFlags &= ~ELF::EF_AMDGPU_XNACK;
- if (AMDGPU::hasXNACK(STI))
- EFlags |= ELF::EF_AMDGPU_XNACK;
-
- EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
- if (AMDGPU::hasSRAMECC(STI))
- EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
-
- MCA.setELFHeaderEFlags(EFlags);
-}
+ : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
// We use it for emitting the accumulated PAL metadata as a .note record.
// The PAL metadata is reset after it is emitted.
void AMDGPUTargetELFStreamer::finish() {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCA.setELFHeaderEFlags(getEFlags());
+
std::string Blob;
const char *Vendor = getPALMetadata()->getVendor();
unsigned Type = getPALMetadata()->getType();
unsigned NoteFlags = 0;
// TODO Apparently, this is currently needed for OpenCL as mentioned in
// https://reviews.llvm.org/D74995
- if (Os == Triple::AMDHSA)
+ if (STI.getTargetTriple().getOS() == Triple::AMDHSA)
NoteFlags = ELF::SHF_ALLOC;
S.PushSection();
S.PopSection();
}
-void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
+unsigned AMDGPUTargetELFStreamer::getEFlags() {
+ switch (STI.getTargetTriple().getArch()) {
+ default:
+ llvm_unreachable("Unsupported Arch");
+ case Triple::r600:
+ return getEFlagsR600();
+ case Triple::amdgcn:
+ return getEFlagsAMDGCN();
+ }
+}
+
+unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
+ assert(STI.getTargetTriple().getArch() == Triple::r600);
+
+ return getElfMach(STI.getCPU());
+}
+
+unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
+ assert(STI.getTargetTriple().getArch() == Triple::amdgcn);
+
+ switch (STI.getTargetTriple().getOS()) {
+ default:
+ // TODO: Why are some tests have "mingw" listed as OS?
+ // llvm_unreachable("Unsupported OS");
+ case Triple::UnknownOS:
+ return getEFlagsUnknownOS();
+ case Triple::AMDHSA:
+ return getEFlagsAMDHSA();
+ case Triple::AMDPAL:
+ return getEFlagsAMDPAL();
+ case Triple::Mesa3D:
+ return getEFlagsMesa3D();
+ }
+}
+
+unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
+ // TODO: Why are some tests have "mingw" listed as OS?
+ // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
+
+ return getEFlagsV3();
+}
+
+unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
+ assert(STI.getTargetTriple().getOS() == Triple::AMDHSA);
+
+ if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
+ switch (*HsaAbiVer) {
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
+ return getEFlagsV3();
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ return getEFlagsV4();
+ }
+ }
+
+ llvm_unreachable("HSA OS ABI Version identification must be defined");
+}
+
+unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
+ assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
+
+ return getEFlagsV3();
+}
+
+unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
+ assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
+
+ return getEFlagsV3();
+}
+
+unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
+ unsigned EFlagsV3 = 0;
+
+ // mach.
+ EFlagsV3 |= getElfMach(STI.getCPU());
+
+ // xnack.
+ if (getTargetID()->isXnackOnOrAny())
+ EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
+ // sramecc.
+ if (getTargetID()->isSramEccOnOrAny())
+ EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
+
+ return EFlagsV3;
+}
+
+unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
+ unsigned EFlagsV4 = 0;
+
+ // mach.
+ EFlagsV4 |= getElfMach(STI.getCPU());
+
+ // xnack.
+ switch (getTargetID()->getXnackSetting()) {
+ case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
+ EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
+ break;
+ case AMDGPU::IsaInfo::TargetIDSetting::Any:
+ EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
+ break;
+ case AMDGPU::IsaInfo::TargetIDSetting::Off:
+ EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
+ break;
+ case AMDGPU::IsaInfo::TargetIDSetting::On:
+ EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
+ break;
+ }
+ // sramecc.
+ switch (getTargetID()->getSramEccSetting()) {
+ case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
+ EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
+ break;
+ case AMDGPU::IsaInfo::TargetIDSetting::Any:
+ EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
+ break;
+ case AMDGPU::IsaInfo::TargetIDSetting::Off:
+ EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
+ break;
+ case AMDGPU::IsaInfo::TargetIDSetting::On:
+ EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
+ break;
+ }
+
+ return EFlagsV4;
+}
+
+void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
uint32_t Major, uint32_t Minor) {
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
- ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
+ ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
OS.emitInt32(Major);
OS.emitInt32(Minor);
});
}
void
-AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
- uint32_t Minor,
- uint32_t Stepping,
- StringRef VendorName,
- StringRef ArchName) {
+AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
+ uint32_t Minor,
+ uint32_t Stepping,
+ StringRef VendorName,
+ StringRef ArchName) {
uint16_t VendorNameSize = VendorName.size() + 1;
uint16_t ArchNameSize = ArchName.size() + 1;
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
VendorNameSize + ArchNameSize;
+ convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
- ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
+ ELF::NT_AMD_HSA_ISA_VERSION, [&](MCELFStreamer &OS) {
OS.emitInt16(VendorNameSize);
OS.emitInt16(ArchNameSize);
OS.emitInt32(Major);
SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
}
-bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
+bool AMDGPUTargetELFStreamer::EmitISAVersion() {
// Create two labels to mark the beginning and end of the desc field
// and a MCExpr to calculate the size of the desc field.
auto &Context = getContext();
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
- EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
+ EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME,
[&](MCELFStreamer &OS) {
OS.emitLabel(DescBegin);
- OS.emitBytes(IsaVersionString);
+ OS.emitBytes(getTargetID()->toString());
OS.emitLabel(DescEnd);
});
return true;
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
- EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
+ EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_METADATA,
[&](MCELFStreamer &OS) {
OS.emitLabel(DescBegin);
OS.emitBytes(HSAMetadataString);
void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
- bool ReserveXNACK) {
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {
auto &Streamer = getStreamer();
auto &Context = Streamer.getContext();
Streamer.emitLabel(KernelDescriptorSymbol);
Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size);
Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size);
+ Streamer.emitInt32(KernelDescriptor.kernarg_size);
+
for (uint8_t Res : KernelDescriptor.reserved0)
Streamer.emitInt8(Res);
+
// FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
// expression being created is:
// (start of kernel code) - (start of kernel descriptor)
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
+#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDGPUPALMetadata.h"
#include "llvm/MC/MCStreamer.h"
AMDGPUPALMetadata PALMetadata;
protected:
+ // TODO: Move HSAMetadataStream to AMDGPUTargetStreamer.
+ Optional<AMDGPU::IsaInfo::AMDGPUTargetID> TargetID;
+
MCContext &getContext() const { return Streamer.getContext(); }
public:
AMDGPUPALMetadata *getPALMetadata() { return &PALMetadata; }
- virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
+ virtual void EmitDirectiveAMDGCNTarget() = 0;
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) = 0;
- virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
- uint32_t Stepping,
- StringRef VendorName,
- StringRef ArchName) = 0;
+ virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor,
+ uint32_t Stepping,
+ StringRef VendorName,
+ StringRef ArchName) = 0;
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
Align Alignment) = 0;
/// \returns True on success, false on failure.
- virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
+ virtual bool EmitISAVersion() = 0;
/// \returns True on success, false on failure.
virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString);
virtual void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
- bool ReserveXNACK) = 0;
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) = 0;
static StringRef getArchNameFromElfMach(unsigned ElfMach);
static unsigned getElfMach(StringRef GPU);
+
+ const Optional<AMDGPU::IsaInfo::AMDGPUTargetID> &getTargetID() const {
+ return TargetID;
+ }
+ Optional<AMDGPU::IsaInfo::AMDGPUTargetID> &getTargetID() {
+ return TargetID;
+ }
+ void initializeTargetID(const MCSubtargetInfo &STI) {
+ assert(TargetID == None && "TargetID can only be initialized once");
+ TargetID.emplace(STI);
+ }
+ void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) {
+ initializeTargetID(STI);
+
+ assert(getTargetID() != None && "TargetID is None");
+ getTargetID()->setTargetIDFromFeaturesString(FeatureString);
+ }
};
class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
void finish() override;
- void EmitDirectiveAMDGCNTarget(StringRef Target) override;
+ void EmitDirectiveAMDGCNTarget() override;
void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) override;
- void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
- uint32_t Stepping, StringRef VendorName,
- StringRef ArchName) override;
+ void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor,
+ uint32_t Stepping, StringRef VendorName,
+ StringRef ArchName) override;
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override;
/// \returns True on success, false on failure.
- bool EmitISAVersion(StringRef IsaVersionString) override;
+ bool EmitISAVersion() override;
/// \returns True on success, false on failure.
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override;
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
- bool ReserveXNACK) override;
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
};
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
+ const MCSubtargetInfo &STI;
MCStreamer &Streamer;
- Triple::OSType Os;
void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType,
function_ref<void(MCELFStreamer &)> EmitDesc);
+ unsigned getEFlags();
+
+ unsigned getEFlagsR600();
+ unsigned getEFlagsAMDGCN();
+
+ unsigned getEFlagsUnknownOS();
+ unsigned getEFlagsAMDHSA();
+ unsigned getEFlagsAMDPAL();
+ unsigned getEFlagsMesa3D();
+
+ unsigned getEFlagsV3();
+ unsigned getEFlagsV4();
+
public:
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
void finish() override;
- void EmitDirectiveAMDGCNTarget(StringRef Target) override;
+ void EmitDirectiveAMDGCNTarget() override;
void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) override;
- void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
- uint32_t Stepping, StringRef VendorName,
- StringRef ArchName) override;
+ void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor,
+ uint32_t Stepping, StringRef VendorName,
+ StringRef ArchName) override;
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override;
/// \returns True on success, false on failure.
- bool EmitISAVersion(StringRef IsaVersionString) override;
+ bool EmitISAVersion() override;
/// \returns True on success, false on failure.
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override;
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
- bool ReserveXNACK) override;
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
};
}
#include "SIRegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
}
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
+ if (!Subtarget->isTrapHandlerEnabled() ||
+ Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
+ return lowerTrapEndpgm(Op, DAG);
+
+ if (Optional<uint8_t> HsaAbiVer = AMDGPU::getHsaAbiVersion(Subtarget)) {
+ switch (*HsaAbiVer) {
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
+ return lowerTrapHsaQueuePtr(Op, DAG);
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ return Subtarget->supportsGetDoorbellID() ?
+ lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG);
+ }
+ }
+
+ llvm_unreachable("Unknown trap handler");
+}
+
+SDValue SITargetLowering::lowerTrapEndpgm(
+ SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
+ return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
+}
- if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
- !Subtarget->isTrapHandlerEnabled())
- return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
+SDValue SITargetLowering::lowerTrapHsaQueuePtr(
+ SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Chain = Op.getOperand(0);
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
QueuePtr, SDValue());
+
+ uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSATrap);
SDValue Ops[] = {
ToReg,
- DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMTrap, SL, MVT::i16),
+ DAG.getTargetConstant(TrapID, SL, MVT::i16),
SGPR01,
ToReg.getValue(1)
};
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
}
+SDValue SITargetLowering::lowerTrapHsa(
+ SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Chain = Op.getOperand(0);
+
+ uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSATrap);
+ SDValue Ops[] = {
+ Chain,
+ DAG.getTargetConstant(TrapID, SL, MVT::i16)
+ };
+ return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
+}
+
SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
MachineFunction &MF = DAG.getMachineFunction();
- if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
- !Subtarget->isTrapHandlerEnabled()) {
+ if (!Subtarget->isTrapHandlerEnabled() ||
+ Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
DiagnosticInfoUnsupported NoTrap(MF.getFunction(),
"debugtrap handler not supported",
Op.getDebugLoc(),
return Chain;
}
+ uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSADebugTrap);
SDValue Ops[] = {
Chain,
- DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16)
+ DAG.getTargetConstant(TrapID, SL, MVT::i16)
};
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
}
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+
SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerTrapEndpgm(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
int NONE = 0;
}
-def TRAPID{
- int LLVM_TRAP = 2;
- int LLVM_DEBUG_TRAP = 3;
-}
-
def HWREG {
int MODE = 1;
int STATUS = 2;
static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion(
"amdhsa-code-object-version", llvm::cl::Hidden,
- llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(3));
+ llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4),
+ llvm::cl::ZeroOrMore);
namespace {
return ELF::ELFABIVERSION_AMDGPU_HSA_V2;
case 3:
return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
+ case 4:
+ return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
default:
- return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
+ report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
+ Twine(AmdhsaCodeObjectVersion));
}
}
bool isHsaAbiVersion2(const MCSubtargetInfo *STI) {
- if (const auto &&HsaAbiVer = getHsaAbiVersion(STI))
- return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
+ if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
+ return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
return false;
}
bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
- if (const auto &&HsaAbiVer = getHsaAbiVersion(STI))
- return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
+ if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
+ return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
+ return false;
+}
+
+bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
+ if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
+ return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
return false;
}
+bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
+ return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI);
+}
+
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
namespace IsaInfo {
AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
- : XnackSetting(TargetIDSetting::Any), SramEccSetting(TargetIDSetting::Any) {
+ : STI(STI), XnackSetting(TargetIDSetting::Any),
+ SramEccSetting(TargetIDSetting::Any) {
if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
XnackSetting = TargetIDSetting::Unsupported;
if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
}
}
-void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
- auto TargetTriple = STI->getTargetTriple();
- auto Version = getIsaVersion(STI->getCPU());
+std::string AMDGPUTargetID::toString() const {
+ std::string StringRep = "";
+ raw_string_ostream StreamRep(StringRep);
+
+ auto TargetTriple = STI.getTargetTriple();
+ auto Version = getIsaVersion(STI.getCPU());
- Stream << TargetTriple.getArchName() << '-'
- << TargetTriple.getVendorName() << '-'
- << TargetTriple.getOSName() << '-'
- << TargetTriple.getEnvironmentName() << '-'
- << "gfx"
- << Version.Major
- << Version.Minor
- << hexdigit(Version.Stepping, true);
+ StreamRep << TargetTriple.getArchName() << '-'
+ << TargetTriple.getVendorName() << '-'
+ << TargetTriple.getOSName() << '-'
+ << TargetTriple.getEnvironmentName() << '-';
+
+ std::string Processor = "";
+ // TODO: Following else statement is present here because we used various
+ // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
+ // Remove once all aliases are removed from GCNProcessors.td.
+ if (Version.Major >= 9)
+ Processor = STI.getCPU().str();
+ else
+ Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
+ Twine(Version.Stepping))
+ .str();
+
+ std::string Features = "";
+ if (Optional<uint8_t> HsaAbiVersion = getHsaAbiVersion(&STI)) {
+ switch (*HsaAbiVersion) {
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
+ // Code object V2 only supported specific processors and had fixed
+ // settings for the XNACK.
+ if (Processor == "gfx600") {
+ } else if (Processor == "gfx601") {
+ } else if (Processor == "gfx602") {
+ } else if (Processor == "gfx700") {
+ } else if (Processor == "gfx701") {
+ } else if (Processor == "gfx702") {
+ } else if (Processor == "gfx703") {
+ } else if (Processor == "gfx704") {
+ } else if (Processor == "gfx705") {
+ } else if (Processor == "gfx801") {
+ if (!isXnackOnOrAny())
+ report_fatal_error(
+ "AMD GPU code object V2 does not support processor " + Processor +
+ " without XNACK");
+ } else if (Processor == "gfx802") {
+ } else if (Processor == "gfx803") {
+ } else if (Processor == "gfx805") {
+ } else if (Processor == "gfx810") {
+ if (!isXnackOnOrAny())
+ report_fatal_error(
+ "AMD GPU code object V2 does not support processor " + Processor +
+ " without XNACK");
+ } else if (Processor == "gfx900") {
+ if (isXnackOnOrAny())
+ Processor = "gfx901";
+ } else if (Processor == "gfx902") {
+ if (isXnackOnOrAny())
+ Processor = "gfx903";
+ } else if (Processor == "gfx904") {
+ if (isXnackOnOrAny())
+ Processor = "gfx905";
+ } else if (Processor == "gfx906") {
+ if (isXnackOnOrAny())
+ Processor = "gfx907";
+ } else {
+ report_fatal_error(
+ "AMD GPU code object V2 does not support processor " + Processor);
+ }
+ break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
+ // xnack.
+ if (isXnackOnOrAny())
+ Features += "+xnack";
+ // In code object v2 and v3, "sramecc" feature was spelled with a
+ // hyphen ("sram-ecc").
+ if (isSramEccOnOrAny())
+ Features += "+sram-ecc";
+ break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ // sramecc.
+ if (getSramEccSetting() == TargetIDSetting::Off)
+ Features += ":sramecc-";
+ else if (getSramEccSetting() == TargetIDSetting::On)
+ Features += ":sramecc+";
+ // xnack.
+ if (getXnackSetting() == TargetIDSetting::Off)
+ Features += ":xnack-";
+ else if (getXnackSetting() == TargetIDSetting::On)
+ Features += ":xnack+";
+ break;
+ default:
+ break;
+ }
+ }
- if (hasXNACK(*STI))
- Stream << "+xnack";
- if (hasSRAMECC(*STI))
- Stream << "+sramecc";
+ StreamRep << Processor << Features;
- Stream.flush();
+ StreamRep.flush();
+ return StringRep;
}
unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
/// \returns True if HSA OS ABI Version identification is 3,
/// false otherwise.
bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
+/// \returns True if HSA OS ABI Version identification is 4,
+/// false otherwise.
+bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
+/// \returns True if HSA OS ABI Version identification is 3 or 4,
+/// false otherwise.
+bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI);
struct GcnBufferFormatInfo {
unsigned Format;
class AMDGPUTargetID {
private:
+ const MCSubtargetInfo &STI;
TargetIDSetting XnackSetting;
TargetIDSetting SramEccSetting;
void setTargetIDFromFeaturesString(StringRef FS);
void setTargetIDFromTargetIDStream(StringRef TargetID);
-};
-/// Streams isa version string for given subtarget \p STI into \p Stream.
-void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
+ /// \returns String representation of an object.
+ std::string toString() const;
+};
/// \returns Wavefront size for given subtarget \p STI.
unsigned getWavefrontSize(const MCSubtargetInfo *STI);
}
return;
}
- BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
+ BlobType = ELF::NT_AMD_PAL_METADATA;
NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
if (!NamedMD || !NamedMD->getNumOperands()) {
// Emit msgpack metadata by default
// Metadata.
bool AMDGPUPALMetadata::setFromBlob(unsigned Type, StringRef Blob) {
BlobType = Type;
- if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA)
+ if (Type == ELF::NT_AMD_PAL_METADATA)
return setFromLegacyBlob(Blob);
return setFromMsgPackBlob(Blob);
}
// a .note record of the specified AMD type. Returns an empty blob if
// there is no PAL metadata,
void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) {
- if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA)
+ if (Type == ELF::NT_AMD_PAL_METADATA)
toLegacyBlob(Blob);
else if (Type)
toMsgPackBlob(Blob);
}
// Get .note record type of metadata blob to be emitted:
-// ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or
+// ELF::NT_AMD_PAL_METADATA (legacy key=val format), or
// ELF::NT_AMDGPU_METADATA (MsgPack format), or
// 0 (no PAL metadata).
unsigned AMDGPUPALMetadata::getType() const {
// Return whether the blob type is legacy PAL metadata.
bool AMDGPUPALMetadata::isLegacy() const {
- return BlobType == ELF::NT_AMD_AMDGPU_PAL_METADATA;
+ return BlobType == ELF::NT_AMD_PAL_METADATA;
}
// Set legacy PAL metadata format.
void AMDGPUPALMetadata::setLegacy() {
- BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
+ BlobType = ELF::NT_AMD_PAL_METADATA;
}
// Erase all PAL metadata.
const char *getVendor() const;
// Get .note record type of metadata blob to be emitted:
- // ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or
+ // ELF::NT_AMD_PAL_METADATA (legacy key=val format), or
// ELF::NT_AMDGPU_METADATA (MsgPack format), or
// 0 (no PAL metadata).
unsigned getType() const;
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX9-NEXT: s_trap 2
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-LABEL: func_use_lds_global_constexpr_cast:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX9-NEXT: s_trap 2
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK-LABEL: {{^}}min_64_max_64:
; CHECK: SGPRBlocks: 0
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=HSAMD %s
; CHECK-LABEL: {{^}}min_64_max_64:
}
attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
-; HSAMD: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
+; HSAMD: NT_AMD_HSA_METADATA (AMD HSA Metadata)
; HSAMD: Version: [ 1, 0 ]
; HSAMD: Kernels:
; HSAMD: - Name: min_64_max_64
# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
-# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s
---
# Trivial clause at beginning of program
-# RUN: llc -march=amdgcn -mcpu=tonga -run-pass post-RA-sched -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=tonga -mattr=-xnack -run-pass post-RA-sched -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
# GCN: FLAT_LOAD_DWORD
# GCN-NEXT: FLAT_LOAD_DWORD
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx600 < %s | FileCheck --check-prefixes=V3-GFX600 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tahiti < %s | FileCheck --check-prefixes=V3-GFX600 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx601 < %s | FileCheck --check-prefixes=V3-GFX601 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=pitcairn < %s | FileCheck --check-prefixes=V3-GFX601 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=verde < %s | FileCheck --check-prefixes=V3-GFX601 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx602 < %s | FileCheck --check-prefixes=V3-GFX602 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=hainan < %s | FileCheck --check-prefixes=V3-GFX602 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=oland < %s | FileCheck --check-prefixes=V3-GFX602 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx700 < %s | FileCheck --check-prefixes=V3-GFX700 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=kaveri < %s | FileCheck --check-prefixes=V3-GFX700 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx701 < %s | FileCheck --check-prefixes=V3-GFX701 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=hawaii < %s | FileCheck --check-prefixes=V3-GFX701 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx702 < %s | FileCheck --check-prefixes=V3-GFX702 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx703 < %s | FileCheck --check-prefixes=V3-GFX703 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=kabini < %s | FileCheck --check-prefixes=V3-GFX703 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=mullins < %s | FileCheck --check-prefixes=V3-GFX703 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx704 < %s | FileCheck --check-prefixes=V3-GFX704 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=bonaire < %s | FileCheck --check-prefixes=V3-GFX704 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx705 < %s | FileCheck --check-prefixes=V3-GFX705 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx802 < %s | FileCheck --check-prefixes=V3-GFX802 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=iceland < %s | FileCheck --check-prefixes=V3-GFX802 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tonga < %s | FileCheck --check-prefixes=V3-GFX802 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx803 < %s | FileCheck --check-prefixes=V3-GFX803 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=fiji < %s | FileCheck --check-prefixes=V3-GFX803 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=polaris10 < %s | FileCheck --check-prefixes=V3-GFX803 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=polaris11 < %s | FileCheck --check-prefixes=V3-GFX803 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx805 < %s | FileCheck --check-prefixes=V3-GFX805 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tongapro < %s | FileCheck --check-prefixes=V3-GFX805 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX900-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX902-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX904-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX909-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1011-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1033 < %s | FileCheck --check-prefixes=V3-GFX1033 %s
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX600 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=GFX600 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx601 < %s | FileCheck --check-prefixes=GFX601 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX704 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire < %s | FileCheck --check-prefixes=GFX704 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx705 < %s | FileCheck --check-prefixes=GFX705 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 < %s | FileCheck --check-prefixes=GFX801 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo < %s | FileCheck --check-prefixes=GFX801 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefixes=GFX801 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX801-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX801-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo < %s | FileCheck --check-prefixes=GFX801 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=GFX801-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=GFX801-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 < %s | FileCheck --check-prefixes=GFX802 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland < %s | FileCheck --check-prefixes=GFX802 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga < %s | FileCheck --check-prefixes=GFX802 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx805 < %s | FileCheck --check-prefixes=GFX805 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tongapro < %s | FileCheck --check-prefixes=GFX805 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 < %s | FileCheck --check-prefixes=GFX810 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX810-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX810-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney < %s | FileCheck --check-prefixes=GFX810 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=GFX810-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=GFX810-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX900-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX900-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 < %s | FileCheck --check-prefixes=GFX902 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX902-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX902-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 < %s | FileCheck --check-prefixes=GFX904 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX904-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX904-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck --check-prefixes=GFX906 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=GFX906-NOSRAMECC %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=GFX906-SRAMECC %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX906-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX906-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=GFX906-NOSRAMECC-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=GFX906-SRAMECC-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=GFX906-NOSRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=GFX906-SRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX908 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=GFX908-NOSRAMECC %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=GFX908-SRAMECC %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX908-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX908-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=GFX908-NOSRAMECC-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=GFX908-SRAMECC-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=GFX908-NOSRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=GFX908-SRAMECC-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 < %s | FileCheck --check-prefixes=GFX909 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX909-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX909-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=GFX90C %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=GFX90C-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=GFX90C-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 < %s | FileCheck --check-prefixes=GFX1011 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1011-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1011-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 < %s | FileCheck --check-prefixes=GFX1012 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1012-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1012-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck --check-prefixes=GFX1030 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX1031 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1032 < %s | FileCheck --check-prefixes=GFX1032 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1033 < %s | FileCheck --check-prefixes=GFX1033 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=XNACK-GFX900 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=NO-XNACK-GFX902 %s
-
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+sramecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX904 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
-
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX904 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s
-
-; FIXME: With the default attributes these directives are not accurate for
-; xnack and sramecc. Subsequent Target-ID patches will address this.
+; V3-GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
+; V3-GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
+; V3-GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602"
+; V3-GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
+; V3-GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701"
+; V3-GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702"
+; V3-GFX703: .amdgcn_target "amdgcn-amd-amdhsa--gfx703"
+; V3-GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704"
+; V3-GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705"
+; V3-GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801"
+; V3-GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801+xnack"
+; V3-GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802"
+; V3-GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803"
+; V3-GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805"
+; V3-GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810"
+; V3-GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack"
+; V3-GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
+; V3-GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack"
+; V3-GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"
+; V3-GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack"
+; V3-GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904"
+; V3-GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
+; V3-GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906"
+; V3-GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc"
+; V3-GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack"
+; V3-GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc"
+; V3-GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908"
+; V3-GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+sram-ecc"
+; V3-GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack"
+; V3-GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack+sram-ecc"
+; V3-GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909"
+; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack"
+; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
+; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack"
+; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
+; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
+; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011"
+; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack"
+; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012"
+; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack"
+; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030"
+; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031"
+; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032"
+; V3-GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033"
; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
; GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704"
; GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705"
; GFX801: .amdgcn_target "amdgcn-amd-amdhsa--gfx801"
+; GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801:xnack-"
+; GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801:xnack+"
; GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802"
; GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803"
; GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805"
; GFX810: .amdgcn_target "amdgcn-amd-amdhsa--gfx810"
+; GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack-"
+; GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+"
; GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
+; GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
+; GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"
+; GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack-"
+; GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack+"
; GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904"
+; GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack-"
+; GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906"
-
-; XNACK-GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack"
-; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"
-
-; SRAM-ECC-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+sramecc"
-; SRAM-ECC-GFX906: "amdgcn-amd-amdhsa--gfx906+sramecc"
-
-; SRAM-ECC-XNACK-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack+sramecc"
-; SRAM-ECC-XNACK-GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sramecc"
+; GFX906-NOSRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-"
+; GFX906-SRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+"
+; GFX906-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:xnack-"
+; GFX906-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:xnack+"
+; GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-:xnack-"
+; GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+:xnack-"
+; GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-:xnack+"
+; GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+:xnack+"
+; GFX908: .amdgcn_target "amdgcn-amd-amdhsa--gfx908"
+; GFX908-NOSRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-"
+; GFX908-SRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+"
+; GFX908-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:xnack-"
+; GFX908-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:xnack+"
+; GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-:xnack-"
+; GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+:xnack-"
+; GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-:xnack+"
+; GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+:xnack+"
+; GFX909: .amdgcn_target "amdgcn-amd-amdhsa--gfx909"
+; GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909:xnack-"
+; GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909:xnack+"
+; GFX90C: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
+; GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack-"
+; GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack+"
+; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
+; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
+; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"
+; GFX1011: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011"
+; GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011:xnack-"
+; GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011:xnack+"
+; GFX1012: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012"
+; GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack-"
+; GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack+"
+; GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030"
+; GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031"
+; GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032"
+; GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033"
define amdgpu_kernel void @directive_amdgcn_target() {
ret void
-; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=-sramecc < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sramecc < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx908 < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=SRAM-ECC-GFX908 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx908 -mattr=+sramecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=SRAM-ECC-GFX908 %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s
+
; NO-SRAM-ECC-GFX906: Flags [
+; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; NO-SRAM-ECC-GFX906-NEXT: ]
; SRAM-ECC-GFX906: Flags [
+; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
+; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
-; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-GFX906-NEXT: ]
; SRAM-ECC-XNACK-GFX906: Flags [
+; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
+; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
-; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
-; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_XNACK (0x100)
; SRAM-ECC-XNACK-GFX906-NEXT: ]
-; SRAM-ECC-GFX908: Flags [ (0x230)
+; SRAM-ECC-GFX908: Flags [
+; SRAM-ECC-GFX908: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
; SRAM-ECC-GFX908: EF_AMDGPU_MACH_AMDGCN_GFX908 (0x30)
-; SRAM-ECC-GFX908: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-GFX908: ]
+; SRAM-ECC-GFX90A: Flags [
+; SRAM-ECC-GFX90A: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
+; SRAM-ECC-GFX90A: EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)
+; SRAM-ECC-GFX90A: ]
+
define amdgpu_kernel void @elf_header() {
ret void
}
-; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 -mattr=-xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-XNACK-GFX801 %s
-; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 -mattr=+xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=XNACK-GFX802 %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=XNACK-GFX801 %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 -mattr=+xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=XNACK-GFX801 %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-XNACK-GFX802 %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 -mattr=-xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-XNACK-GFX802 %s
-; NO-XNACK-GFX801: Flags [
-; NO-XNACK-GFX801-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX801 (0x28)
-; NO-XNACK-GFX801-NEXT: ]
+; XNACK-GFX801: Flags [
+; XNACK-GFX801-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
+; XNACK-GFX801-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX801 (0x28)
+; XNACK-GFX801-NEXT: ]
-; XNACK-GFX802: Flags [
-; XNACK-GFX802-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX802 (0x29)
-; XNACK-GFX802-NEXT: EF_AMDGPU_XNACK (0x100)
-; XNACK-GFX802-NEXT: ]
+; NO-XNACK-GFX802: Flags [
+; NO-XNACK-GFX802-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX802 (0x29)
+; NO-XNACK-GFX802-NEXT: ]
define amdgpu_kernel void @elf_header() {
ret void
; NONE: OS/ABI: SystemV (0x0)
; HSA: OS/ABI: AMDGPU_HSA (0x40)
-; HSA: ABIVersion: 1
+; HSA: ABIVersion: 2
; PAL: OS/ABI: AMDGPU_PAL (0x41)
; PAL: ABIVersion: 0
; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
-; MESA3D: ABIVersion: 0
+; MESA3D: ABIVersion: 0
define amdgpu_kernel void @elf_header() {
ret void
; OSABI-UNK-NOT: .amd_amdgpu_pal_metadata
; OSABI-UNK-ELF-NOT: Unknown note type
-; OSABI-UNK-ELF: NT_AMD_AMDGPU_ISA (ISA Version)
-; OSABI-UNK-ELF: ISA Version:
+; OSABI-UNK-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
+; OSABI-UNK-ELF: AMD HSA ISA Name:
; OSABI-UNK-ELF: amdgcn-amd-unknown--gfx802
; OSABI-UNK-ELF-NOT: Unknown note type
-; OSABI-UNK-ELF-NOT: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
+; OSABI-UNK-ELF-NOT: NT_AMD_HSA_METADATA (AMD HSA Metadata)
; OSABI-UNK-ELF-NOT: Unknown note type
-; OSABI-UNK-ELF-NOT: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)
+; OSABI-UNK-ELF-NOT: NT_AMD_PAL_METADATA (AMD PAL Metadata)
; OSABI-UNK-ELF-NOT: Unknown note type
; OSABI-HSA: .hsa_code_object_version
; OSABI-HSA: .amd_amdgpu_hsa_metadata
; OSABI-HSA-NOT: .amd_amdgpu_pal_metadata
-; OSABI-HSA-ELF: Unknown note type: (0x00000001)
-; OSABI-HSA-ELF: Unknown note type: (0x00000003)
-; OSABI-HSA-ELF: NT_AMD_AMDGPU_ISA (ISA Version)
-; OSABI-HSA-ELF: ISA Version:
+; OSABI-HSA-ELF: NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)
+; OSABI-HSA-ELF: NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version)
+; OSABI-HSA-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
+; OSABI-HSA-ELF: AMD HSA ISA Name:
; OSABI-HSA-ELF: amdgcn-amd-amdhsa--gfx802
-; OSABI-HSA-ELF: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
+; OSABI-HSA-ELF: NT_AMD_HSA_METADATA (AMD HSA Metadata)
; OSABI-HSA-ELF: HSA Metadata:
; OSABI-HSA-ELF: ---
; OSABI-HSA-ELF: Version: [ 1, 0 ]
; OSABI-HSA-ELF: WavefrontSize: 64
; OSABI-HSA-ELF: NumSGPRs: 96
; OSABI-HSA-ELF: ...
-; OSABI-HSA-ELF-NOT: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)
+; OSABI-HSA-ELF-NOT: NT_AMD_PAL_METADATA (AMD PAL Metadata)
; OSABI-PAL-NOT: .hsa_code_object_version
; OSABI-PAL: .hsa_code_object_isa
; OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802"
; OSABI-PAL-NOT: .amd_amdgpu_hsa_metadata
-; OSABI-PAL-ELF: Unknown note type: (0x00000003)
-; OSABI-PAL-ELF: NT_AMD_AMDGPU_ISA (ISA Version)
-; OSABI-PAL-ELF: ISA Version:
+; OSABI-PAL-ELF: NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version)
+; OSABI-PAL-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
+; OSABI-PAL-ELF: AMD HSA ISA Name:
; OSABI-PAL-ELF: amdgcn-amd-amdpal--gfx802
-; OSABI-PAL-ELF-NOT: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
+; OSABI-PAL-ELF-NOT: NT_AMD_HSA_METADATA (AMD HSA Metadata)
; OSABI-PAL-ELF: NT_AMDGPU_METADATA (AMDGPU Metadata)
; OSABI-PAL-ELF: AMDGPU Metadata:
; OSABI-PAL-ELF: amdpal.pipelines:
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
+; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-CI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-VI-NOXNACK -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-VI-XNACK -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,HSA-CI-V2,GCN %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK-V2,GCN %s
+
+; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s
; GCN-LABEL: {{^}}no_vcc_no_flat:
-; HSA-CI: is_xnack_enabled = 0
-; HSA-VI-NOXNACK: is_xnack_enabled = 0
-; HSA-VI-XNACK: is_xnack_enabled = 1
+
+; HSA-CI-V2: is_xnack_enabled = 0
+; HSA-VI-XNACK-V2: is_xnack_enabled = 1
+
+; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
+; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
+; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: ; NumSgprs: 8
; VI-NOXNACK: ; NumSgprs: 8
}
; GCN-LABEL: {{^}}vcc_no_flat:
-; HSA-CI: is_xnack_enabled = 0
-; HSA-VI-NOXNACK: is_xnack_enabled = 0
-; HSA-VI-XNACK: is_xnack_enabled = 1
+
+; HSA-CI-V2: is_xnack_enabled = 0
+; HSA-VI-XNACK-V2: is_xnack_enabled = 1
+
+; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
+; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
+; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: ; NumSgprs: 10
; VI-NOXNACK: ; NumSgprs: 10
}
; GCN-LABEL: {{^}}no_vcc_flat:
-; HSA-CI: is_xnack_enabled = 0
-; HSA-VI-NOXNACK: is_xnack_enabled = 0
-; HSA-VI-XNACK: is_xnack_enabled = 1
+
+; HSA-CI-V2: is_xnack_enabled = 0
+; HSA-VI-XNACK-V2: is_xnack_enabled = 1
+
+; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
+; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
+; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: ; NumSgprs: 12
; VI-NOXNACK: ; NumSgprs: 14
; VI-XNACK: ; NumSgprs: 14
-; HSA-CI: ; NumSgprs: 12
-; HSA-VI-NOXNACK: ; NumSgprs: 14
-; HSA-VI-XNACK: ; NumSgprs: 14
define amdgpu_kernel void @no_vcc_flat() {
entry:
call void asm sideeffect "", "~{s7},~{flat_scratch}"()
}
; GCN-LABEL: {{^}}vcc_flat:
-; HSA-NOXNACK: is_xnack_enabled = 0
-; HSA-XNACK: is_xnack_enabled = 1
+
+; HSA-CI-V2: is_xnack_enabled = 0
+; HSA-VI-XNACK-V2: is_xnack_enabled = 1
+
+; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
+; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
+; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: ; NumSgprs: 12
; VI-NOXNACK: ; NumSgprs: 14
; VI-XNACK: ; NumSgprs: 14
-; HSA-CI: ; NumSgprs: 12
-; HSA-VI-NOXNACK: ; NumSgprs: 14
-; HSA-VI-XNACK: ; NumSgprs: 14
define amdgpu_kernel void @vcc_flat() {
entry:
call void asm sideeffect "", "~{s7},~{vcc},~{flat_scratch}"()
; scratch usage and implicit flat uses.
; GCN-LABEL: {{^}}use_flat_scr:
+
+; HSA-CI-V2: is_xnack_enabled = 0
+; HSA-VI-XNACK-V2: is_xnack_enabled = 1
+
+; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
+; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
+; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
+
; CI: NumSgprs: 4
; VI-NOXNACK: NumSgprs: 6
; VI-XNACK: NumSgprs: 6
}
; GCN-LABEL: {{^}}use_flat_scr_lo:
+
+; HSA-CI-V2: is_xnack_enabled = 0
+; HSA-VI-XNACK-V2: is_xnack_enabled = 1
+
+; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
+; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
+; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
+
; CI: NumSgprs: 4
; VI-NOXNACK: NumSgprs: 6
; VI-XNACK: NumSgprs: 6
}
; GCN-LABEL: {{^}}use_flat_scr_hi:
+
+; HSA-CI-V2: is_xnack_enabled = 0
+; HSA-VI-XNACK-V2: is_xnack_enabled = 1
+
+; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
+; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
+; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
+
; CI: NumSgprs: 4
; VI-NOXNACK: NumSgprs: 6
; VI-XNACK: NumSgprs: 6
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: amdhsa.kernels:
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
%struct.A = type { i8, float }
%opencl.image1d_t = type opaque
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; CHECK: ---
; CHECK: amdhsa.kernels:
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: amdhsa.kernels:
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: amdhsa.kernels:
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
%opencl.image1d_t = type opaque
%opencl.image1d_array_t = type opaque
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; Make sure llc does not crash for invalid opencl version metadata.
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; Make sure llc does not crash for invalid opencl version metadata.
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; Make sure llc does not crash for invalid opencl version metadata.
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s
-; GCN: ---
-; GCN: Kernels:
-; GCN: - Name: wavefrontsize
-; GCN: CodeProps:
-; GFX10-32: WavefrontSize: 32
-; GFX10-64: WavefrontSize: 64
-; GCN: ...
+; GCN: amdhsa.kernels:
+; GCN: .name: wavefrontsize
+; GFX10-32: .wavefront_size: 32
+; GFX10-64: .wavefront_size: 64
define amdgpu_kernel void @wavefrontsize() {
entry:
ret void
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx600 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx601 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI704 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI704 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX904 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx908 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX908 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx909 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX909 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1010 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1011 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1011 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1012 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1012 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1030 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1030 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1031 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1031 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1032 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1032 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1033 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1033 %s
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx600 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI600 %s
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx601 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI601 %s
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx602 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI602 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI702 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI705 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI801 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI801 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI802 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI803 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX900 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX901 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX902 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX903 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX904 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX905 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX906 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX907 %s
; HSA: .hsa_code_object_version 2,1
-; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
-; HSA-SI601: .hsa_code_object_isa 6,0,1,"AMD","AMDGPU"
+; NONHSA-SI600: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx600"
+; NONHSA-SI601: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx601"
+; NONHSA-SI602: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx602"
; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU"
; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU"
; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU"
; HSA-CI704: .hsa_code_object_isa 7,0,4,"AMD","AMDGPU"
+; HSA-CI705: .hsa_code_object_isa 7,0,5,"AMD","AMDGPU"
; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU"
; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
+; HSA-VI805: .hsa_code_object_isa 8,0,5,"AMD","AMDGPU"
; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
+; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU"
; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
+; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU"
; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU"
+; HSA-GFX905: .hsa_code_object_isa 9,0,5,"AMD","AMDGPU"
; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU"
-; HSA-GFX908: .hsa_code_object_isa 9,0,8,"AMD","AMDGPU"
-; HSA-GFX909: .hsa_code_object_isa 9,0,9,"AMD","AMDGPU"
-; HSA-GFX1010: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
-; HSA-GFX1011: .hsa_code_object_isa 10,1,1,"AMD","AMDGPU"
-; HSA-GFX1012: .hsa_code_object_isa 10,1,2,"AMD","AMDGPU"
-; HSA-GFX1030: .hsa_code_object_isa 10,3,0,"AMD","AMDGPU"
-; HSA-GFX1031: .hsa_code_object_isa 10,3,1,"AMD","AMDGPU"
-; HSA-GFX1032: .hsa_code_object_isa 10,3,2,"AMD","AMDGPU"
-; HSA-GFX1033: .hsa_code_object_isa 10,3,3,"AMD","AMDGPU"
+; HSA-GFX907: .hsa_code_object_isa 9,0,7,"AMD","AMDGPU"
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj --amdhsa-code-object-version=2 | llvm-readobj -symbols -s -sd - | FileCheck --check-prefix=ELF %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
; The SHT_NOTE section contains the output from the .hsa_code_object_*
; directives.
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; PRE-GFX10: enable_wavefront_size32 = 0
-; GFX10-W32: enable_wavefront_size32 = 1
-; GFX10-W64: enable_wavefront_size32 = 0
+; GFX10-W32: .amdhsa_wavefront_size32 1
+; GFX10-W64: .amdhsa_wavefront_size32 0
; PRE-GFX10: wavefront_size = 6
-; GFX10-W32: wavefront_size = 5
-; GFX10-W64: wavefront_size = 6
; HSA: call_convention = -1
; HSA: .end_amd_kernel_code_t
; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000
; Make sure we generate flat store for HSA
; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
-; GFX10: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
+; GFX10: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off
; HSA: .Lfunc_end0:
; HSA: .size simple, .Lfunc_end0-simple
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=HSA %s
+
+declare void @llvm.trap() #0
+declare void @llvm.debugtrap() #1
+
+; HSA: .amdhsa_kernel trap
+; HSA-NEXT: .amdhsa_group_segment_fixed_size 0
+; HSA-NEXT: .amdhsa_private_segment_fixed_size 0
+; HSA-NEXT: .amdhsa_kernarg_size 8
+; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
+; HSA: .end_amdhsa_kernel
+
+define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
+ store volatile i32 1, i32 addrspace(1)* %arg0
+ call void @llvm.trap()
+ unreachable
+ store volatile i32 2, i32 addrspace(1)* %arg0
+ ret void
+}
; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck --check-prefixes=GCN,CI,ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,VI,ALL %s
; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,GFX9,ALL %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s -mattr=-flat-for-global | FileCheck --check-prefixes=GCNHSA,ALL %s
-; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,GFX10HSA,ALL %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 < %s -mattr=-flat-for-global | FileCheck --check-prefixes=GCNHSA,ALL %s
+; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,GFX10HSA,ALL %s
; FIXME: align on alloca seems to be ignored for private_segment_alignment
; GFX9-DAG: s_mov_b32 s{{[0-9]+}}, 0xe00000
-; GCNHSA: .amd_kernel_code_t
-
-; GCNHSA: enable_sgpr_private_segment_wave_byte_offset = 1
-; GCNHSA: user_sgpr_count = 8
-; GCNHSA: enable_sgpr_workgroup_id_x = 1
-; GCNHSA: enable_sgpr_workgroup_id_y = 0
-; GCNHSA: enable_sgpr_workgroup_id_z = 0
-; GCNHSA: enable_sgpr_workgroup_info = 0
-; GCNHSA: enable_vgpr_workitem_id = 0
-
-; GCNHSA: enable_sgpr_private_segment_buffer = 1
-; GCNHSA: enable_sgpr_dispatch_ptr = 0
-; GCNHSA: enable_sgpr_queue_ptr = 0
-; GCNHSA: enable_sgpr_kernarg_segment_ptr = 1
-; GCNHSA: enable_sgpr_dispatch_id = 0
-; GCNHSA: enable_sgpr_flat_scratch_init = 1
-; GCNHSA: enable_sgpr_private_segment_size = 0
-; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0
-; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0
-; GCNHSA: enable_sgpr_grid_workgroup_count_z = 0
-; GCNHSA: workitem_private_segment_byte_size = 32772
-; GCNHSA: private_segment_alignment = 4
-; GCNHSA: .end_amd_kernel_code_t
-
; GFX10HSA: s_add_u32 [[FLAT_SCR_LO:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}}
; GFX10HSA-DAG: s_addc_u32 [[FLAT_SCR_HI:s[0-9]+]], s{{[0-9]+}}, 0
; GFX10HSA-DAG: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), [[FLAT_SCR_LO]]
; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], 0 offen
; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], 0 offen
+; GCNHSA: .amdhsa_kernel large_alloca_compute_shader
+; GCNHSA: .amdhsa_group_segment_fixed_size 0
+; GCNHSA: .amdhsa_private_segment_fixed_size 32772
+; GCNHSA: .amdhsa_user_sgpr_private_segment_buffer 1
+; GCNHSA: .amdhsa_user_sgpr_dispatch_ptr 0
+; GCNHSA: .amdhsa_user_sgpr_queue_ptr 0
+; GCNHSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+; GCNHSA: .amdhsa_user_sgpr_dispatch_id 0
+; GCNHSA: .amdhsa_user_sgpr_flat_scratch_init 1
+; GCNHSA: .amdhsa_user_sgpr_private_segment_size 0
+; GCNHSA: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; GCNHSA: .amdhsa_system_sgpr_workgroup_id_x 1
+; GCNHSA: .amdhsa_system_sgpr_workgroup_id_y 0
+; GCNHSA: .amdhsa_system_sgpr_workgroup_id_z 0
+; GCNHSA: .amdhsa_system_sgpr_workgroup_info 0
+; GCNHSA: .amdhsa_system_vgpr_workitem_id 0
+; GCNHSA: .amdhsa_next_free_vgpr 3
+; GCNHSA: .amdhsa_next_free_sgpr 10
+; GCNHSA: .amdhsa_float_round_mode_32 0
+; GCNHSA: .amdhsa_float_round_mode_16_64 0
+; GCNHSA: .amdhsa_float_denorm_mode_32 3
+; GCNHSA: .amdhsa_float_denorm_mode_16_64 3
+; GCNHSA: .amdhsa_dx10_clamp 1
+; GCNHSA: .amdhsa_ieee_mode 1
+; GCNHSA: .amdhsa_exception_fp_ieee_invalid_op 0
+; GCNHSA: .amdhsa_exception_fp_denorm_src 0
+; GCNHSA: .amdhsa_exception_fp_ieee_div_zero 0
+; GCNHSA: .amdhsa_exception_fp_ieee_overflow 0
+; GCNHSA: .amdhsa_exception_fp_ieee_underflow 0
+; GCNHSA: .amdhsa_exception_fp_ieee_inexact 0
+; GCNHSA: .amdhsa_exception_int_div_zero 0
+; GCNHSA: .end_amdhsa_kernel
+
; Scratch size = alloca size + emergency stack slot, align {{.*}}, addrspace(5)
; ALL: ; ScratchSize: 32772
define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GFX8 %s
; RUN: FileCheck -check-prefix=ERR %s < %t
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GFX9 %s
; RUN: FileCheck -check-prefix=ERR %s < %t
@lds = internal addrspace(3) global float undef, align 4
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: ds_write_b32 v0, v0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX9-NEXT: s_trap 2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
define void @func_use_lds_global_constexpr_cast() {
-; GCN-LABEL: func_use_lds_global_constexpr_cast:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_mov_b64 s[0:1], s[6:7]
-; GCN-NEXT: s_trap 2
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX8-LABEL: func_use_lds_global_constexpr_cast:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-NEXT: s_trap 2
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_use_lds_global_constexpr_cast:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_trap 2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4
ret void
}
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=VI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=VI %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s
define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
; GFX9-LABEL: s_lshr_v2i16:
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; TODO: Some of those tests fail with OS == amdhsa due to unreasonable register
; allocation differences.
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}s_mulk_i32_k0:
; SI: s_load_dword [[VAL:s[0-9]+]]
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
+; RUN: llc -march=amdgcn -mcpu=gfx902 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx904 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; Make sure the correct set of targets are marked with
-; FeatureDoesNotSupportSRAMECC, and +sram-ecc is ignored if it's never
+; FeatureDoesNotSupportSRAMECC, and +sramecc is ignored if it's never
; supported.
; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg:
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=VI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=GFX9 %s
; Make sure the stack is never realigned for entry functions.
; VI-NEXT: .amdhsa_kernel max_alignment_128
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 256
+; VI-NEXT: .amdhsa_kernarg_size 0
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; GFX9-NEXT: .amdhsa_kernel max_alignment_128
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256
+; GFX9-NEXT: .amdhsa_kernarg_size 0
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
; GFX9-NEXT: .amdhsa_next_free_sgpr 8
; GFX9-NEXT: .amdhsa_reserve_vcc 0
+; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
; VI-NEXT: .amdhsa_kernel stackrealign_attr
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 8
+; VI-NEXT: .amdhsa_kernarg_size 0
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; GFX9-NEXT: .amdhsa_kernel stackrealign_attr
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8
+; GFX9-NEXT: .amdhsa_kernarg_size 0
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
; GFX9-NEXT: .amdhsa_next_free_sgpr 8
; GFX9-NEXT: .amdhsa_reserve_vcc 0
+; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
; VI-NEXT: .amdhsa_kernel alignstack_attr
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 128
+; VI-NEXT: .amdhsa_kernarg_size 0
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; GFX9-NEXT: .amdhsa_kernel alignstack_attr
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128
+; GFX9-NEXT: .amdhsa_kernarg_size 0
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
; GFX9-NEXT: .amdhsa_next_free_sgpr 8
; GFX9-NEXT: .amdhsa_reserve_vcc 0
+; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
+; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x12C)
+; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
+; ELF-NEXT: ]
+
+define void @func0() {
+entry:
+ ret void
+}
+
+define void @func1() {
+entry:
+ ret void
+}
+
+define void @func2() {
+entry:
+ ret void
+}
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
+; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x22)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22)
+; ELF-NEXT: ]
+
+define void @func0() {
+entry:
+ ret void
+}
+
+define void @func1() {
+entry:
+ ret void
+}
+
+define void @func2() {
+entry:
+ ret void
+}
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
+; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x22C)
+; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
+; ELF-NEXT: ]
+
+define void @func0() #0 {
+entry:
+ ret void
+}
+
+define void @func1() #0 {
+entry:
+ ret void
+}
+
+define void @func2() #0 {
+entry:
+ ret void
+}
+
+attributes #0 = { "target-features"="-xnack" }
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
+; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x32C)
+; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
+; ELF-NEXT: ]
+
+define void @func0() #0 {
+entry:
+ ret void
+}
+
+define void @func1() #0 {
+entry:
+ ret void
+}
+
+define void @func2() #0 {
+entry:
+ ret void
+}
+
+attributes #0 = { "target-features"="+xnack" }
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
+; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x22C)
+; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
+; ELF-NEXT: ]
+
+define void @func0() {
+entry:
+ ret void
+}
+
+define void @func1() #0 {
+entry:
+ ret void
+}
+
+define void @func2() {
+entry:
+ ret void
+}
+
+attributes #0 = { "target-features"="-xnack" }
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
+; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x22C)
+; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
+; ELF-NEXT: ]
+
+define void @func0() #0 {
+entry:
+ ret void
+}
+
+define void @func1() {
+entry:
+ ret void
+}
+
+define void @func2() {
+entry:
+ ret void
+}
+
+attributes #0 = { "target-features"="-xnack" }
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
+; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x32C)
+; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
+; ELF-NEXT: ]
+
+define void @func0() {
+entry:
+ ret void
+}
+
+define void @func1() #0 {
+entry:
+ ret void
+}
+
+define void @func2() {
+entry:
+ ret void
+}
+
+attributes #0 = { "target-features"="+xnack" }
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
+; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x32C)
+; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
+; ELF-NEXT: ]
+
+define void @func0() #0 {
+entry:
+ ret void
+}
+
+define void @func1() {
+entry:
+ ret void
+}
+
+define void @func2() {
+entry:
+ ret void
+}
+
+attributes #0 = { "target-features"="+xnack" }
--- /dev/null
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s 2>&1 | FileCheck --check-prefixes=ERR %s
+
+; ERR: error: xnack setting of 'func2' function does not match module xnack setting
+
+define void @func0() {
+entry:
+ ret void
+}
+
+define void @func1() #0 {
+entry:
+ ret void
+}
+
+define void @func2() #1 {
+entry:
+ ret void
+}
+
+attributes #0 = { "target-features"="-xnack" }
+attributes #1 = { "target-features"="+xnack" }
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
+; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x12C)
+; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
+; ELF-NEXT: ]
+
+define void @func0() {
+entry:
+ ret void
+}
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
+; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x22)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22)
+; ELF-NEXT: ]
+
+define void @func0() {
+entry:
+ ret void
+}
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
+; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x22C)
+; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
+; ELF-NEXT: ]
+
+define void @func0() #0 {
+entry:
+ ret void
+}
+
+attributes #0 = { "target-features"="-xnack" }
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
+
+; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
+; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
+; ASM: amdhsa.version:
+; ASM: - 1
+; ASM: - 1
+
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: ABIVersion: 2
+; ELF: Flags [ (0x32C)
+; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
+; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
+; ELF-NEXT: ]
+
+define void @func0() #0 {
+entry:
+ ret void
+}
+
+attributes #0 = { "target-features"="+xnack" }
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V2 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V2 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V2 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V2 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s
+
+declare void @llvm.trap() #0
+declare void @llvm.debugtrap() #1
+
+define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
+; NOHSA-TRAP-GFX900-V2-LABEL: trap:
+; NOHSA-TRAP-GFX900-V2: ; %bb.0:
+; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
+; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm
+;
+; NOHSA-TRAP-GFX900-V3-LABEL: trap:
+; NOHSA-TRAP-GFX900-V3: ; %bb.0:
+; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
+; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
+;
+; NOHSA-TRAP-GFX900-V4-LABEL: trap:
+; NOHSA-TRAP-GFX900-V4: ; %bb.0:
+; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
+; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX803-V2-LABEL: trap:
+; HSA-TRAP-GFX803-V2: .amd_kernel_code_t
+; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_major = 1
+; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_minor = 2
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_kind = 1
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_major = 8
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_minor = 0
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_stepping = 3
+; HSA-TRAP-GFX803-V2-NEXT: kernel_code_entry_byte_offset = 256
+; HSA-TRAP-GFX803-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: granulated_workitem_vgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: priority = 0
+; HSA-TRAP-GFX803-V2-NEXT: float_mode = 240
+; HSA-TRAP-GFX803-V2-NEXT: priv = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1
+; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 8
+; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0
+; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0
+; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1
+; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1
+; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0
+; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0
+; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0
+; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8
+; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 8
+; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 3
+; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0
+; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0
+; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4
+; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4
+; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4
+; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6
+; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1
+; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0
+; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t
+; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0:
+; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GFX803-V2-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V2-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX803-V3-LABEL: trap:
+; HSA-TRAP-GFX803-V3: ; %bb.0:
+; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GFX803-V3-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V3-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX803-V4-LABEL: trap:
+; HSA-TRAP-GFX803-V4: ; %bb.0:
+; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V4-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX900-V2-LABEL: trap:
+; HSA-TRAP-GFX900-V2: .amd_kernel_code_t
+; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: priority = 0
+; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240
+; HSA-TRAP-GFX900-V2-NEXT: priv = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 8
+; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1
+; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
+; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
+; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6
+; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1
+; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0:
+; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
+; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
+; HSA-TRAP-GFX900-V2-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[2:3]
+; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V2-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX900-V3-LABEL: trap:
+; HSA-TRAP-GFX900-V3: ; %bb.0:
+; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
+; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
+; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[2:3]
+; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V3-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX900-V4-LABEL: trap:
+; HSA-TRAP-GFX900-V4: ; %bb.0:
+; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
+; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V4-NEXT: s_trap 2
+;
+; HSA-NOTRAP-GFX900-V2-LABEL: trap:
+; HSA-NOTRAP-GFX900-V2: .amd_kernel_code_t
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240
+; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 8
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
+; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6
+; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1
+; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0:
+; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
+; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm
+;
+; HSA-NOTRAP-GFX900-V3-LABEL: trap:
+; HSA-NOTRAP-GFX900-V3: ; %bb.0:
+; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
+; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
+;
+; HSA-NOTRAP-GFX900-V4-LABEL: trap:
+; HSA-NOTRAP-GFX900-V4: ; %bb.0:
+; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
+; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm
+ store volatile i32 1, i32 addrspace(1)* %arg0
+ call void @llvm.trap()
+ unreachable
+ store volatile i32 2, i32 addrspace(1)* %arg0
+ ret void
+}
+
+define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr {
+; NOHSA-TRAP-GFX900-V2-LABEL: non_entry_trap:
+; NOHSA-TRAP-GFX900-V2: ; %bb.0: ; %entry
+; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc
+; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; NOHSA-TRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc
+; NOHSA-TRAP-GFX900-V2-NEXT: s_cbranch_vccz BB1_2
+; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret
+; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3
+; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm
+; NOHSA-TRAP-GFX900-V2-NEXT: BB1_2: ; %trap
+; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm
+;
+; NOHSA-TRAP-GFX900-V3-LABEL: non_entry_trap:
+; NOHSA-TRAP-GFX900-V3: ; %bb.0: ; %entry
+; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc
+; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; NOHSA-TRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc
+; NOHSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz BB1_2
+; NOHSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret
+; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3
+; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
+; NOHSA-TRAP-GFX900-V3-NEXT: BB1_2: ; %trap
+; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
+;
+; NOHSA-TRAP-GFX900-V4-LABEL: non_entry_trap:
+; NOHSA-TRAP-GFX900-V4: ; %bb.0: ; %entry
+; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
+; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; NOHSA-TRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc
+; NOHSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz BB1_2
+; NOHSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
+; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
+; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm
+; NOHSA-TRAP-GFX900-V4-NEXT: BB1_2: ; %trap
+; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX803-V2-LABEL: non_entry_trap:
+; HSA-TRAP-GFX803-V2: .amd_kernel_code_t
+; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_major = 1
+; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_minor = 2
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_kind = 1
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_major = 8
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_minor = 0
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_stepping = 3
+; HSA-TRAP-GFX803-V2-NEXT: kernel_code_entry_byte_offset = 256
+; HSA-TRAP-GFX803-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: granulated_workitem_vgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: granulated_wavefront_sgpr_count = 1
+; HSA-TRAP-GFX803-V2-NEXT: priority = 0
+; HSA-TRAP-GFX803-V2-NEXT: float_mode = 240
+; HSA-TRAP-GFX803-V2-NEXT: priv = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1
+; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 8
+; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0
+; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0
+; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1
+; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1
+; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0
+; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0
+; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0
+; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8
+; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 10
+; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 3
+; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0
+; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0
+; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4
+; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4
+; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4
+; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6
+; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1
+; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0
+; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t
+; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0: ; %entry
+; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-V2-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-TRAP-GFX803-V2-NEXT: s_and_b64 vcc, exec, vcc
+; HSA-TRAP-GFX803-V2-NEXT: s_cbranch_vccz BB1_2
+; HSA-TRAP-GFX803-V2-NEXT: ; %bb.1: ; %ret
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 3
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V2-NEXT: s_endpgm
+; HSA-TRAP-GFX803-V2-NEXT: BB1_2: ; %trap
+; HSA-TRAP-GFX803-V2-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX803-V2-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX803-V3-LABEL: non_entry_trap:
+; HSA-TRAP-GFX803-V3: ; %bb.0: ; %entry
+; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-V3-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-TRAP-GFX803-V3-NEXT: s_and_b64 vcc, exec, vcc
+; HSA-TRAP-GFX803-V3-NEXT: s_cbranch_vccz BB1_2
+; HSA-TRAP-GFX803-V3-NEXT: ; %bb.1: ; %ret
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 3
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V3-NEXT: s_endpgm
+; HSA-TRAP-GFX803-V3-NEXT: BB1_2: ; %trap
+; HSA-TRAP-GFX803-V3-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX803-V3-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX803-V4-LABEL: non_entry_trap:
+; HSA-TRAP-GFX803-V4: ; %bb.0: ; %entry
+; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-V4-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-TRAP-GFX803-V4-NEXT: s_and_b64 vcc, exec, vcc
+; HSA-TRAP-GFX803-V4-NEXT: s_cbranch_vccz BB1_2
+; HSA-TRAP-GFX803-V4-NEXT: ; %bb.1: ; %ret
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 3
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V4-NEXT: s_endpgm
+; HSA-TRAP-GFX803-V4-NEXT: BB1_2: ; %trap
+; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX803-V4-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX900-V2-LABEL: non_entry_trap:
+; HSA-TRAP-GFX900-V2: .amd_kernel_code_t
+; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1
+; HSA-TRAP-GFX900-V2-NEXT: priority = 0
+; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240
+; HSA-TRAP-GFX900-V2-NEXT: priv = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 8
+; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1
+; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
+; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10
+; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
+; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6
+; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1
+; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0: ; %entry
+; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc
+; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; HSA-TRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc
+; HSA-TRAP-GFX900-V2-NEXT: s_cbranch_vccz BB1_2
+; HSA-TRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret
+; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3
+; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V2-NEXT: s_endpgm
+; HSA-TRAP-GFX900-V2-NEXT: BB1_2: ; %trap
+; HSA-TRAP-GFX900-V2-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX900-V2-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX900-V3-LABEL: non_entry_trap:
+; HSA-TRAP-GFX900-V3: ; %bb.0: ; %entry
+; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc
+; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; HSA-TRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc
+; HSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz BB1_2
+; HSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret
+; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3
+; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V3-NEXT: s_endpgm
+; HSA-TRAP-GFX900-V3-NEXT: BB1_2: ; %trap
+; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX900-V3-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX900-V4-LABEL: non_entry_trap:
+; HSA-TRAP-GFX900-V4: ; %bb.0: ; %entry
+; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
+; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; HSA-TRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc
+; HSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz BB1_2
+; HSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
+; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
+; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V4-NEXT: s_endpgm
+; HSA-TRAP-GFX900-V4-NEXT: BB1_2: ; %trap
+; HSA-TRAP-GFX900-V4-NEXT: s_trap 2
+;
+; HSA-NOTRAP-GFX900-V2-LABEL: non_entry_trap:
+; HSA-NOTRAP-GFX900-V2: .amd_kernel_code_t
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240
+; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 8
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
+; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10
+; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6
+; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1
+; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0: ; %entry
+; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc
+; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; HSA-NOTRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc
+; HSA-NOTRAP-GFX900-V2-NEXT: s_cbranch_vccz BB1_2
+; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret
+; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3
+; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm
+; HSA-NOTRAP-GFX900-V2-NEXT: BB1_2: ; %trap
+; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm
+;
+; HSA-NOTRAP-GFX900-V3-LABEL: non_entry_trap:
+; HSA-NOTRAP-GFX900-V3: ; %bb.0: ; %entry
+; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc
+; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; HSA-NOTRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc
+; HSA-NOTRAP-GFX900-V3-NEXT: s_cbranch_vccz BB1_2
+; HSA-NOTRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret
+; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3
+; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
+; HSA-NOTRAP-GFX900-V3-NEXT: BB1_2: ; %trap
+; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
+;
+; HSA-NOTRAP-GFX900-V4-LABEL: non_entry_trap:
+; HSA-NOTRAP-GFX900-V4: ; %bb.0: ; %entry
+; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
+; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; HSA-NOTRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc
+; HSA-NOTRAP-GFX900-V4-NEXT: s_cbranch_vccz BB1_2
+; HSA-NOTRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
+; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
+; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm
+; HSA-NOTRAP-GFX900-V4-NEXT: BB1_2: ; %trap
+; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm
+entry:
+ %tmp29 = load volatile i32, i32 addrspace(1)* %arg0
+ %cmp = icmp eq i32 %tmp29, -1
+ br i1 %cmp, label %ret, label %trap
+
+trap:
+ call void @llvm.trap()
+ unreachable
+
+ret:
+ store volatile i32 3, i32 addrspace(1)* %arg0
+ ret void
+}
+
+define amdgpu_kernel void @debugtrap(i32 addrspace(1)* nocapture readonly %arg0) {
+; NOHSA-TRAP-GFX900-V2-LABEL: debugtrap:
+; NOHSA-TRAP-GFX900-V2: ; %bb.0:
+; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
+; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2
+; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1]
+; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm
+;
+; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap:
+; NOHSA-TRAP-GFX900-V3: ; %bb.0:
+; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
+; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2
+; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]
+; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
+;
+; NOHSA-TRAP-GFX900-V4-LABEL: debugtrap:
+; NOHSA-TRAP-GFX900-V4: ; %bb.0:
+; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
+; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2
+; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]
+; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX803-V2-LABEL: debugtrap:
+; HSA-TRAP-GFX803-V2: .amd_kernel_code_t
+; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_major = 1
+; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_minor = 2
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_kind = 1
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_major = 8
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_minor = 0
+; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_stepping = 3
+; HSA-TRAP-GFX803-V2-NEXT: kernel_code_entry_byte_offset = 256
+; HSA-TRAP-GFX803-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: granulated_workitem_vgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: priority = 0
+; HSA-TRAP-GFX803-V2-NEXT: float_mode = 240
+; HSA-TRAP-GFX803-V2-NEXT: priv = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1
+; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 8
+; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0
+; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0
+; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0
+; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1
+; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1
+; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0
+; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0
+; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0
+; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0
+; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8
+; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 8
+; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 4
+; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0
+; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0
+; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0
+; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4
+; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4
+; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4
+; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6
+; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1
+; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0
+; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t
+; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0:
+; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v3, 2
+; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V2-NEXT: s_trap 3
+; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v3
+; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V2-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX803-V3-LABEL: debugtrap:
+; HSA-TRAP-GFX803-V3: ; %bb.0:
+; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v3, 2
+; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V3-NEXT: s_trap 3
+; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v3
+; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V3-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX803-V4-LABEL: debugtrap:
+; HSA-TRAP-GFX803-V4: ; %bb.0:
+; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v3, 2
+; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V4-NEXT: s_trap 3
+; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v3
+; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-V4-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX900-V2-LABEL: debugtrap:
+; HSA-TRAP-GFX900-V2: .amd_kernel_code_t
+; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: priority = 0
+; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240
+; HSA-TRAP-GFX900-V2-NEXT: priv = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 8
+; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1
+; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
+; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3
+; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6
+; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1
+; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0:
+; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
+; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2
+; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V2-NEXT: s_trap 3
+; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1]
+; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V2-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX900-V3-LABEL: debugtrap:
+; HSA-TRAP-GFX900-V3: ; %bb.0:
+; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
+; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2
+; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V3-NEXT: s_trap 3
+; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]
+; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V3-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX900-V4-LABEL: debugtrap:
+; HSA-TRAP-GFX900-V4: ; %bb.0:
+; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
+; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2
+; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V4-NEXT: s_trap 3
+; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]
+; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-V4-NEXT: s_endpgm
+;
+; HSA-NOTRAP-GFX900-V2-LABEL: debugtrap:
+; HSA-NOTRAP-GFX900-V2: .amd_kernel_code_t
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240
+; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 8
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
+; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6
+; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1
+; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0:
+; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
+; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2
+; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1]
+; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm
+;
+; HSA-NOTRAP-GFX900-V3-LABEL: debugtrap:
+; HSA-NOTRAP-GFX900-V3: ; %bb.0:
+; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
+; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2
+; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]
+; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
+;
+; HSA-NOTRAP-GFX900-V4-LABEL: debugtrap:
+; HSA-NOTRAP-GFX900-V4: ; %bb.0:
+; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
+; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2
+; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]
+; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm
+ store volatile i32 1, i32 addrspace(1)* %arg0
+ call void @llvm.debugtrap()
+ store volatile i32 2, i32 addrspace(1)* %arg0
+ ret void
+}
+
+attributes #0 = { nounwind noreturn }
+attributes #1 = { nounwind }
-// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,NONGFX10,AMDHSA
-// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10,AMDHSA
-// RUN: not llvm-mc -triple amdgcn-amd- -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,NONGFX10,AMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10,AMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA
// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,NONGFX10,AMDHSA,ALL
.text
// GCN-LABEL: warning: test_target
// GFX8-NOT: error:
-// GFX10: error: target must match options
-// NONAMDHSA: error: unknown directive
+// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1010+xnack
+// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-unknown--gfx810
.warning "test_target"
-.amdgcn_target "amdgcn-amd-amdhsa--gfx803+xnack"
+.amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack"
// GCN-LABEL: warning: test_amdhsa_kernel_no_name
// GCN: error: unknown directive
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
+// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
.amdhsa_kernel complete
.amdhsa_group_segment_fixed_size 1
.amdhsa_private_segment_fixed_size 1
+ .amdhsa_kernarg_size 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 1
.amdhsa_next_free_sgpr 27
.amdhsa_reserve_vcc 0
.amdhsa_reserve_flat_scratch 0
- .amdhsa_reserve_xnack_mask 0
+ .amdhsa_reserve_xnack_mask 1
.amdhsa_float_round_mode_32 1
.amdhsa_float_round_mode_16_64 1
.amdhsa_float_denorm_mode_32 1
// ASM: .amdhsa_kernel complete
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_kernarg_size 8
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
.amdhsa_reserve_flat_scratch 1
.amdhsa_reserve_vcc 0
- .amdhsa_reserve_xnack_mask 0
+ .amdhsa_reserve_xnack_mask 1
.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
+// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
.amdhsa_kernel complete
.amdhsa_group_segment_fixed_size 1
.amdhsa_private_segment_fixed_size 1
+ .amdhsa_kernarg_size 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 1
.amdhsa_next_free_sgpr 27
.amdhsa_reserve_vcc 0
.amdhsa_reserve_flat_scratch 0
- .amdhsa_reserve_xnack_mask 0
+ .amdhsa_reserve_xnack_mask 1
.amdhsa_float_round_mode_32 1
.amdhsa_float_round_mode_16_64 1
.amdhsa_float_denorm_mode_32 1
// ASM: .amdhsa_kernel complete
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_kernarg_size 8
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
.amdhsa_reserve_flat_scratch 1
.amdhsa_reserve_vcc 0
- .amdhsa_reserve_xnack_mask 0
+ .amdhsa_reserve_xnack_mask 1
.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0
--- /dev/null
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack -filetype=obj < %s > %t
+// RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// READOBJ: Section Headers
+// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
+// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
+
+// READOBJ: Relocation section '.rela.rodata' at offset
+// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
+// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
+// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
+// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310
+
+// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
+// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
+// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
+// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
+// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
+// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
+// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
+// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
+// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd
+
+// OBJDUMP: Contents of section .rodata
+// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
+// minimal
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
+// complete
+// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
+// special_sgpr
+// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000
+// disabled_user_sgpr
+// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
+
+.p2align 8
+.type minimal,@function
+minimal:
+ s_endpgm
+
+.p2align 8
+.type complete,@function
+complete:
+ s_endpgm
+
+.p2align 8
+.type special_sgpr,@function
+special_sgpr:
+ s_endpgm
+
+.p2align 8
+.type disabled_user_sgpr,@function
+disabled_user_sgpr:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+// Test that only specifying required directives is allowed, and that defaulted
+// values are omitted.
+.p2align 6
+.amdhsa_kernel minimal
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .end_amdhsa_kernel
+
+// Test that we can specify all available directives with non-default values.
+.p2align 6
+.amdhsa_kernel complete
+ .amdhsa_group_segment_fixed_size 1
+ .amdhsa_private_segment_fixed_size 1
+ .amdhsa_kernarg_size 8
+ .amdhsa_user_sgpr_private_segment_buffer 1
+ .amdhsa_user_sgpr_dispatch_ptr 1
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_user_sgpr_kernarg_segment_ptr 1
+ .amdhsa_user_sgpr_dispatch_id 1
+ .amdhsa_user_sgpr_flat_scratch_init 1
+ .amdhsa_user_sgpr_private_segment_size 1
+ .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+ .amdhsa_system_sgpr_workgroup_id_x 0
+ .amdhsa_system_sgpr_workgroup_id_y 1
+ .amdhsa_system_sgpr_workgroup_id_z 1
+ .amdhsa_system_sgpr_workgroup_info 1
+ .amdhsa_system_vgpr_workitem_id 1
+ .amdhsa_next_free_vgpr 9
+ .amdhsa_next_free_sgpr 27
+ .amdhsa_reserve_vcc 0
+ .amdhsa_reserve_flat_scratch 0
+ .amdhsa_reserve_xnack_mask 1
+ .amdhsa_float_round_mode_32 1
+ .amdhsa_float_round_mode_16_64 1
+ .amdhsa_float_denorm_mode_32 1
+ .amdhsa_float_denorm_mode_16_64 0
+ .amdhsa_dx10_clamp 0
+ .amdhsa_ieee_mode 0
+ .amdhsa_fp16_overflow 1
+ .amdhsa_exception_fp_ieee_invalid_op 1
+ .amdhsa_exception_fp_denorm_src 1
+ .amdhsa_exception_fp_ieee_div_zero 1
+ .amdhsa_exception_fp_ieee_overflow 1
+ .amdhsa_exception_fp_ieee_underflow 1
+ .amdhsa_exception_fp_ieee_inexact 1
+ .amdhsa_exception_int_div_zero 1
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel complete
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
+// ASM-NEXT: .amdhsa_next_free_vgpr 9
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 1
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
+
+// Test that we are including special SGPR usage in the granulated count.
+.p2align 6
+.amdhsa_kernel special_sgpr
+ // Same next_free_sgpr as "complete", but...
+ .amdhsa_next_free_sgpr 27
+ // ...on GFX9 this should require an additional 6 SGPRs, pushing us from
+ // 3 granules to 4
+ .amdhsa_reserve_flat_scratch 1
+
+ .amdhsa_reserve_vcc 0
+ .amdhsa_reserve_xnack_mask 1
+
+ .amdhsa_float_denorm_mode_16_64 0
+ .amdhsa_dx10_clamp 0
+ .amdhsa_ieee_mode 0
+ .amdhsa_next_free_vgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel special_sgpr
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM: .end_amdhsa_kernel
+
+// Test that explicitly disabling user_sgpr's does not affect the user_sgpr
+// count, i.e. this should produce the same descriptor as minimal.
+.p2align 6
+.amdhsa_kernel disabled_user_sgpr
+ .amdhsa_user_sgpr_private_segment_buffer 0
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel disabled_user_sgpr
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .end_amdhsa_kernel
+
+.section .foo
+
+.byte .amdgcn.gfx_generation_number
+// ASM: .byte 9
+
+.byte .amdgcn.gfx_generation_minor
+// ASM: .byte 0
+
+.byte .amdgcn.gfx_generation_stepping
+// ASM: .byte 4
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v7, s10
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 8
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 11
+
+.set .amdgcn.next_free_vgpr, 0
+.set .amdgcn.next_free_sgpr, 0
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v16, s3
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 17
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 4
+
+// Metadata
+
+.amdgpu_metadata
+ amdhsa.version:
+ - 3
+ - 0
+ amdhsa.kernels:
+ - .name: amd_kernel_code_t_test_all
+ .symbol: amd_kernel_code_t_test_all@kd
+ .kernarg_segment_size: 8
+ .group_segment_fixed_size: 16
+ .private_segment_fixed_size: 32
+ .kernarg_segment_align: 64
+ .wavefront_size: 128
+ .sgpr_count: 14
+ .vgpr_count: 40
+ .max_flat_workgroup_size: 256
+ - .name: amd_kernel_code_t_minimal
+ .symbol: amd_kernel_code_t_minimal@kd
+ .kernarg_segment_size: 8
+ .group_segment_fixed_size: 16
+ .private_segment_fixed_size: 32
+ .kernarg_segment_align: 64
+ .wavefront_size: 128
+ .sgpr_count: 14
+ .vgpr_count: 40
+ .max_flat_workgroup_size: 256
+.end_amdgpu_metadata
+
+// ASM: .amdgpu_metadata
+// ASM: amdhsa.kernels:
+// ASM: - .group_segment_fixed_size: 16
+// ASM: .kernarg_segment_align: 64
+// ASM: .kernarg_segment_size: 8
+// ASM: .max_flat_workgroup_size: 256
+// ASM: .name: amd_kernel_code_t_test_all
+// ASM: .private_segment_fixed_size: 32
+// ASM: .sgpr_count: 14
+// ASM: .symbol: 'amd_kernel_code_t_test_all@kd'
+// ASM: .vgpr_count: 40
+// ASM: .wavefront_size: 128
+// ASM: - .group_segment_fixed_size: 16
+// ASM: .kernarg_segment_align: 64
+// ASM: .kernarg_segment_size: 8
+// ASM: .max_flat_workgroup_size: 256
+// ASM: .name: amd_kernel_code_t_minimal
+// ASM: .private_segment_fixed_size: 32
+// ASM: .sgpr_count: 14
+// ASM: .symbol: 'amd_kernel_code_t_minimal@kd'
+// ASM: .vgpr_count: 40
+// ASM: .wavefront_size: 128
+// ASM: amdhsa.version:
+// ASM-NEXT: - 3
+// ASM-NEXT: - 0
+// ASM: .end_amdgpu_metadata
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx801 -mattr=-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts,-xnack -show-encoding %s | FileCheck --check-prefix=GFX9 %s
.hsa_code_object_isa
// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
-// GFX10: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s
// OSABI-HSA: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802"
-// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
-// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
-// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
+// OSABI-UNK-ERR: error: target id must match options
+// OSABI-HSA-ERR: error: target id must match options
+// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802"
// RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s
// OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802"
-// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
-// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
-// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
+// OSABI-UNK-ERR: error: target id must match options
+// OSABI-HSA-ERR: error: target id must match options
+// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802"
// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s
// OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802"
-// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
-// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
-// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
+// OSABI-UNK-ERR: error: target id must match options
+// OSABI-HSA-ERR: error: target id must match options
+// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-unknown--gfx802"
-# RUN: llvm-mc -preserve-comments -triple amdgcn-amd-amdhsa %s >%t-1.s
-# RUN: llvm-mc -preserve-comments -triple amdgcn-amd-amdhsa %t-1.s >%t-2.s
+# RUN: llvm-mc -preserve-comments -triple amdgcn-amd- %s >%t-1.s
+# RUN: llvm-mc -preserve-comments -triple amdgcn-amd- %t-1.s >%t-2.s
# RUN: diff %t-1.s %t-2.s
# Test that AMDGPU assembly round-trips when run through MC; the first
# RUN: obj2yaml %t.o.3 | FileCheck --check-prefixes=YAML-SRAM-ECC-XNACK-GFX900 %s
# ELF-SRAM-ECC-NONE: Flags [
-# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
+# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
# ELF-SRAM-ECC-NONE-NEXT: ]
# ELF-SRAM-ECC-GFX900: Flags [
+# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
-# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-GFX900-NEXT: ]
# ELF-SRAM-ECC-XNACK-GFX900: Flags [
+# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
+# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
-# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
-# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_XNACK (0x100)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: ]
-# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_SRAM_ECC ]
-# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
-# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
+# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
+# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
+# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_XNACK_V3, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
# Doc1
--- !ELF
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
- Flags: [ EF_AMDGPU_SRAM_ECC ]
+ Flags: [ EF_AMDGPU_FEATURE_SRAMECC_V3 ]
...
# Doc2
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
- Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
+ Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
...
# Doc3
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
- Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
+ Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_XNACK_V3, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
...
# RUN: obj2yaml %t.o.2 | FileCheck --check-prefixes=YAML-XNACK-GFX801 %s
# ELF-ALL: Flags [
-# ELF-XNACK-NONE: EF_AMDGPU_XNACK (0x100)
+# ELF-XNACK-NONE: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
+# ELF-XNACK-GFX801: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
# ELF-XNACK-GFX801: EF_AMDGPU_MACH_AMDGCN_GFX801 (0x28)
-# ELF-XNACK-GFX801: EF_AMDGPU_XNACK (0x100)
# ELF-ALL: ]
-# YAML-XNACK-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_XNACK ]
-# YAML-XNACK-GFX801: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_XNACK ]
+# YAML-XNACK-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_FEATURE_XNACK_V3 ]
+# YAML-XNACK-GFX801: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_FEATURE_XNACK_V3 ]
# Doc1
--- !ELF
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
- Flags: [ EF_AMDGPU_XNACK ]
+ Flags: [ EF_AMDGPU_FEATURE_XNACK_V3 ]
...
# Doc2
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
- Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_XNACK ]
+ Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_FEATURE_XNACK_V3 ]
...
my_kernel.kd:
.long 0x00000000 ;; group_segment_fixed_size
.long 0x00000000 ;; private_segment_fixed_size
- .quad 0x00FF000000000000 ;; reserved bytes.
+ .long 0x00000000 ;; kernarg_segment_size.
+ .long 0x00000000 ;; reserved bytes.
.quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works.
;; 20 reserved bytes.
- .quad 0x0000000000000000
+ .quad 0x00FF000000000000 ;; reserved bytes.
.quad 0x0000000000000000
.long 0x00000000
; RUN: split-file %s %t.dir
-; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
+; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble
+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
; RUN: diff %t1 %t1-re-assemble
-; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
+; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble
+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
; RUN: diff %t2 %t2-re-assemble
-; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3
+; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble
+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
; RUN: diff %t3 %t3-re-assemble
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.amdhsa_reserve_flat_scratch 1
- .amdhsa_reserve_xnack_mask 1
+ .amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_vcc 1
.end_amdhsa_kernel
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 35
.amdhsa_reserve_flat_scratch 1
- .amdhsa_reserve_xnack_mask 1
+ .amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_vcc 1
.end_amdhsa_kernel
; RUN: split-file %s %t.dir
-; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
+; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble
+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
; RUN: diff %t1 %t1-re-assemble
-; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
+; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble
+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
; RUN: diff %t2 %t2-re-assemble
-; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3
+; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \
-; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble
+; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
; RUN: diff %t3 %t3-re-assemble
;--- 1.s
;; Entirely zeroed kernel descriptor (for GFX10).
-; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack -filetype=obj -o %t
; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s
;; TODO:
;; Entirely zeroed kernel descriptor (for GFX9).
-; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \
-; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
+; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: diff %t1 %t2
; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s
-; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \
-; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
+; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s
;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details).
-# RUN: yaml2obj %s -o %t -DCPU=GFX600
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX600 -DFLAGS=0x20
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 -DFLAG_VALUE=0x20
-# RUN: yaml2obj %s -o %t -DCPU=GFX601
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX601 -DFLAGS=0x21
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 -DFLAG_VALUE=0x20
-# RUN: yaml2obj %s -o %t -DCPU=GFX602
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX602 -DFLAGS=0x3A
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 -DFLAG_VALUE=0x20
-# RUN: yaml2obj %s -o %t -DCPU=GFX700
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX700 -DFLAGS=0x22
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 -DFLAG_VALUE=0x21
-# RUN: yaml2obj %s -o %t -DCPU=GFX701
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX701 -DFLAGS=0x23
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 -DFLAG_VALUE=0x21
-# RUN: yaml2obj %s -o %t -DCPU=GFX702
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX702 -DFLAGS=0x24
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 -DFLAG_VALUE=0x21
-# RUN: yaml2obj %s -o %t -DCPU=GFX703
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX703 -DFLAGS=0x25
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 -DFLAG_VALUE=0x3A
-# RUN: yaml2obj %s -o %t -DCPU=GFX704
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX704 -DFLAGS=0x26
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 -DFLAG_VALUE=0x3A
-# RUN: yaml2obj %s -o %t -DCPU=GFX705
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX705 -DFLAGS=0x3B
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 -DFLAG_VALUE=0x3A
-# RUN: yaml2obj %s -o %t -DCPU=GFX801
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX801 -DFLAGS=0x28
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 -DFLAG_VALUE=0x22
-# RUN: yaml2obj %s -o %t -DCPU=GFX802
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX802 -DFLAGS=0x29
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 -DFLAG_VALUE=0x22
-# RUN: yaml2obj %s -o %t -DCPU=GFX803
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX803 -DFLAGS=0x2A
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 -DFLAG_VALUE=0x22
-# RUN: yaml2obj %s -o %t -DCPU=GFX805
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX805 -DFLAGS=0x3C
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 -DFLAG_VALUE=0x23
-# RUN: yaml2obj %s -o %t -DCPU=GFX810
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX810 -DFLAGS=0x2B
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 -DFLAG_VALUE=0x23
-# RUN: yaml2obj %s -o %t -DCPU=GFX900
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX900 -DFLAGS=0x2C
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 -DFLAG_VALUE=0x23
-# RUN: yaml2obj %s -o %t -DCPU=GFX902
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX902 -DFLAGS=0x2D
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 -DFLAG_VALUE=0x24
-# RUN: yaml2obj %s -o %t -DCPU=GFX904
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX904 -DFLAGS=0x2E
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 -DFLAG_VALUE=0x24
-# RUN: yaml2obj %s -o %t -DCPU=GFX906
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX906 -DFLAGS=0x2F
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 -DFLAG_VALUE=0x24
-# RUN: yaml2obj %s -o %t -DCPU=GFX908
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX908 -DFLAGS=0x30
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 -DFLAG_VALUE=0x25
-# RUN: yaml2obj %s -o %t -DCPU=GFX909
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX909 -DFLAGS=0x31
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 -DFLAG_VALUE=0x25
-# RUN: yaml2obj %s -o %t -DCPU=GFX90A
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX90A -DFLAGS=0x3F
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 -DFLAG_VALUE=0x25
-# RUN: yaml2obj %s -o %t -DCPU=GFX90C
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX90C -DFLAGS=0x32
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 -DFLAG_VALUE=0x26
-# RUN: yaml2obj %s -o %t -DCPU=GFX1010
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1010 -DFLAGS=0x33
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 -DFLAG_VALUE=0x26
-# RUN: yaml2obj %s -o %t -DCPU=GFX1011
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1011 -DFLAGS=0x34
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 -DFLAG_VALUE=0x26
-# RUN: yaml2obj %s -o %t -DCPU=GFX1012
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1012 -DFLAGS=0x35
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 -DFLAG_VALUE=0x3B
-# RUN: yaml2obj %s -o %t -DCPU=GFX1030
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1030 -DFLAGS=0x36
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 -DFLAG_VALUE=0x3B
-# RUN: yaml2obj %s -o %t -DCPU=GFX1031
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1031 -DFLAGS=0x37
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 -DFLAG_VALUE=0x3B
-# RUN: yaml2obj %s -o %t -DCPU=GFX1032
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1032 -DFLAGS=0x38
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 -DFLAG_VALUE=0x28
-# RUN: yaml2obj %s -o %t -DCPU=GFX1033
-# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1033 -DFLAGS=0x39
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 -DFLAG_VALUE=0x28
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 -DFLAG_VALUE=0x28
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 -DFLAG_VALUE=0x29
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 -DFLAG_VALUE=0x29
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 -DFLAG_VALUE=0x29
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 -DFLAG_VALUE=0x2A
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 -DFLAG_VALUE=0x2A
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 -DFLAG_VALUE=0x2A
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 -DFLAG_VALUE=0x3C
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 -DFLAG_VALUE=0x3C
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 -DFLAG_VALUE=0x3C
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 -DFLAG_VALUE=0x2B
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 -DFLAG_VALUE=0x2B
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 -DFLAG_VALUE=0x2B
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 -DFLAG_VALUE=0x2C
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 -DFLAG_VALUE=0x2C
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 -DFLAG_VALUE=0x2C
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 -DFLAG_VALUE=0x2D
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 -DFLAG_VALUE=0x2D
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 -DFLAG_VALUE=0x2D
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 -DFLAG_VALUE=0x2E
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 -DFLAG_VALUE=0x2E
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 -DFLAG_VALUE=0x2E
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 -DFLAG_VALUE=0x2F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 -DFLAG_VALUE=0x2F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 -DFLAG_VALUE=0x2F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 -DFLAG_VALUE=0x30
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 -DFLAG_VALUE=0x30
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 -DFLAG_VALUE=0x30
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 -DFLAG_VALUE=0x31
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 -DFLAG_VALUE=0x31
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 -DFLAG_VALUE=0x31
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A -DFLAG_VALUE=0x3F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A -DFLAG_VALUE=0x3F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A -DFLAG_VALUE=0x3F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 -DFLAG_VALUE=0x34
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 -DFLAG_VALUE=0x34
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 -DFLAG_VALUE=0x34
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 -DFLAG_VALUE=0x37
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 -DFLAG_VALUE=0x37
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 -DFLAG_VALUE=0x37
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 -DFLAG_VALUE=0x38
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 -DFLAG_VALUE=0x38
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 -DFLAG_VALUE=0x38
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 -DFLAG_VALUE=0x39
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 -DFLAG_VALUE=0x39
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 -DFLAG_VALUE=0x39
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3"
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3"
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_V3"
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x23F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_V3"
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x23F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_ANY_V4"
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_OFF_V4"
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x23F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_ON_V4"
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x33F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_ANY_V4"
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 (0x400)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x43F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_OFF_V4"
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 (0x800)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x83F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_ON_V4"
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_ON_V4 (0xC00)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0xC3F
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_AMDGPU_HSA
+ ABIVersion: [[ABI_VERSION]]
Type: ET_REL
Machine: EM_AMDGPU
- Flags: [ EF_AMDGPU_MACH_AMDGCN_[[CPU]] ]
-
-# CHECK: File: [[FILE]]
-# CHECK-NEXT: Format: elf64-amdgpu
-# CHECK-NEXT: Arch: amdgcn
-# CHECK-NEXT: AddressSize: 64bit
-# CHECK-NEXT: LoadName: <Not found>
-# CHECK-NEXT: ElfHeader {
-# CHECK-NEXT: Ident {
-# CHECK-NEXT: Magic: (7F 45 4C 46)
-# CHECK-NEXT: Class: 64-bit (0x2)
-# CHECK-NEXT: DataEncoding: LittleEndian (0x1)
-# CHECK-NEXT: FileVersion: 1
-# CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40)
-# CHECK-NEXT: ABIVersion: 0
-# CHECK-NEXT: Unused: (00 00 00 00 00 00 00)
-# CHECK-NEXT: }
-# CHECK-NEXT: Type: Relocatable (0x1)
-# CHECK-NEXT: Machine: EM_AMDGPU (0xE0)
-# CHECK-NEXT: Version: 1
-# CHECK-NEXT: Entry: 0x0
-# CHECK-NEXT: ProgramHeaderOffset: 0x0
-# CHECK-NEXT: SectionHeaderOffset: 0x58
-# CHECK-NEXT: Flags [ ([[FLAGS]])
-# CHECK-NEXT: EF_AMDGPU_MACH_AMDGCN_[[CPU]] ([[FLAGS]])
-# CHECK-NEXT: ]
-# CHECK-NEXT: HeaderSize: 64
-# CHECK-NEXT: ProgramHeaderEntrySize: 0
-# CHECK-NEXT: ProgramHeaderCount: 0
-# CHECK-NEXT: SectionHeaderEntrySize: 64
-# CHECK-NEXT: SectionHeaderCount: 3
-# CHECK-NEXT: StringTableSectionIndex: 2
-# CHECK-NEXT: }
+ Flags: [ [[FLAG_NAME]] ]
+
+# ALL: File: [[FILE]]
+# ALL-NEXT: Format: elf64-amdgpu
+# ALL-NEXT: Arch: amdgcn
+# ALL-NEXT: AddressSize: 64bit
+# ALL-NEXT: LoadName: <Not found>
+# ALL-NEXT: ElfHeader {
+# ALL-NEXT: Ident {
+# ALL-NEXT: Magic: (7F 45 4C 46)
+# ALL-NEXT: Class: 64-bit (0x2)
+# ALL-NEXT: DataEncoding: LittleEndian (0x1)
+# ALL-NEXT: FileVersion: 1
+# ALL-NEXT: OS/ABI: AMDGPU_HSA (0x40)
+# ALL-NEXT: ABIVersion: [[ABI_VERSION]]
+# ALL-NEXT: Unused: (00 00 00 00 00 00 00)
+# ALL-NEXT: }
+# ALL-NEXT: Type: Relocatable (0x1)
+# ALL-NEXT: Machine: EM_AMDGPU (0xE0)
+# ALL-NEXT: Version: 1
+# ALL-NEXT: Entry: 0x0
+# ALL-NEXT: ProgramHeaderOffset: 0x0
+# ALL-NEXT: SectionHeaderOffset: 0x58
+# KNOWN-ABI-VERSION-NEXT: Flags [ ([[FLAG_VALUE]])
+# SINGLE-FLAG-NEXT: [[FLAG_NAME]] ([[FLAG_VALUE]])
+# DOUBLE-FLAG-NEXT: [[FLAG_0]]
+# DOUBLE-FLAG-NEXT: [[FLAG_1]]
+# KNOWN-ABI-VERSION-NEXT: ]
+# UNKNOWN-ABI-VERSION-NEXT: Flags: [[FLAG_VALUE]]
+# ALL-NEXT: HeaderSize: 64
+# ALL-NEXT: ProgramHeaderEntrySize: 0
+# ALL-NEXT: ProgramHeaderCount: 0
+# ALL-NEXT: SectionHeaderEntrySize: 64
+# ALL-NEXT: SectionHeaderCount: 3
+# ALL-NEXT: StringTableSectionIndex: 2
+# ALL-NEXT: }
// GNU: Displaying notes found in: .note.no.desc
// GNU-NEXT: Owner Data size Description
-// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
-// GNU-NEXT: HSA Metadata:
+// GNU-NEXT: AMD 0x00000000 NT_AMD_HSA_METADATA (AMD HSA Metadata)
+// GNU-NEXT: AMD HSA Metadata:
// GNU-NEXT: {{^ $}}
-// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_ISA (ISA Version)
-// GNU-NEXT: ISA Version:
+// GNU-NEXT: AMD 0x00000000 NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
+// GNU-NEXT: AMD HSA ISA Name:
// GNU-NEXT: {{^ $}}
// GNU-EMPTY:
// GNU-NEXT: Displaying notes found in: .note.desc
// GNU-NEXT: Owner Data size Description
-// GNU-NEXT: AMD 0x0000000a NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
-// GNU-NEXT: HSA Metadata:
+// GNU-NEXT: AMD 0x0000000a NT_AMD_HSA_METADATA (AMD HSA Metadata)
+// GNU-NEXT: AMD HSA Metadata:
// GNU-NEXT: meta_blah
-// GNU-NEXT: AMD 0x00000009 NT_AMD_AMDGPU_ISA (ISA Version)
-// GNU-NEXT: ISA Version:
+// GNU-NEXT: AMD 0x00000009 NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
+// GNU-NEXT: AMD HSA ISA Name:
// GNU-NEXT: isa_blah
// GNU-EMPTY:
// GNU-NEXT: Displaying notes found in: .note.other
// GNU-NEXT: Owner Data size Description
-// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)
+// GNU-NEXT: AMD 0x00000000 NT_AMD_PAL_METADATA (AMD PAL Metadata)
+// GNU-NEXT: AMD PAL Metadata:
+// GNU-NEXT: {{^ $}}
// GNU-EMPTY:
// GNU-NEXT: Displaying notes found in: .note.unknown
// GNU-NEXT: Owner Data size Description
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x0
-// LLVM-NEXT: Type: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
-// LLVM-NEXT: HSA Metadata:
+// LLVM-NEXT: Type: NT_AMD_HSA_METADATA (AMD HSA Metadata)
+// LLVM-NEXT: AMD HSA Metadata:
// LLVM-NEXT: }
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x0
-// LLVM-NEXT: Type: NT_AMD_AMDGPU_ISA (ISA Version)
-// LLVM-NEXT: ISA Version:
+// LLVM-NEXT: Type: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
+// LLVM-NEXT: AMD HSA ISA Name:
// LLVM-NEXT: }
// LLVM-NEXT: }
// LLVM-NEXT: NoteSection {
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0xA
-// LLVM-NEXT: Type: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
-// LLVM-NEXT: HSA Metadata: meta_blah
+// LLVM-NEXT: Type: NT_AMD_HSA_METADATA (AMD HSA Metadata)
+// LLVM-NEXT: AMD HSA Metadata: meta_blah
// LLVM-NEXT: }
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x9
-// LLVM-NEXT: Type: NT_AMD_AMDGPU_ISA (ISA Version)
-// LLVM-NEXT: ISA Version: isa_blah
+// LLVM-NEXT: Type: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
+// LLVM-NEXT: AMD HSA ISA Name: isa_blah
// LLVM-NEXT: }
// LLVM-NEXT: }
// LLVM-NEXT: NoteSection {
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x0
-// LLVM-NEXT: Type: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)
+// LLVM-NEXT: Type: NT_AMD_PAL_METADATA (AMD PAL Metadata)
+// LLVM-NEXT: AMD PAL Metadata:
// LLVM-NEXT: }
// LLVM-NEXT: }
// LLVM-NEXT: NoteSection {
.align 4
.long 4 /* namesz */
.long 0 /* descsz */
- .long 10 /* type = NT_AMD_AMDGPU_HSA_METADATA */
+ .long 10 /* type = NT_AMD_HSA_METADATA */
.asciz "AMD"
.long 4 /* namesz */
.long 0 /* descsz */
- .long 11 /* type = NT_AMD_AMDGPU_ISA */
+ .long 11 /* type = NT_AMD_HSA_ISA_NAME */
.asciz "AMD"
.section ".note.desc", "a"
.align 4
.long 4 /* namesz */
.long end.meta - begin.meta /* descsz */
- .long 10 /* type = NT_AMD_AMDGPU_HSA_METADATA */
+ .long 10 /* type = NT_AMD_HSA_METADATA */
.asciz "AMD"
begin.meta:
.asciz "meta_blah"
.align 4
.long 4 /* namesz */
.long end.isa - begin.isa /* descsz */
- .long 11 /* type = NT_AMD_AMDGPU_ISA */
+ .long 11 /* type = NT_AMD_HSA_ISA_NAME */
.asciz "AMD"
begin.isa:
.asciz "isa_blah"
.align 4
.long 4 /* namesz */
.long 0 /* descsz */
- .long 12 /* type = NT_AMD_AMDGPU_PAL_METADATA */
+ .long 12 /* type = NT_AMD_PAL_METADATA */
.asciz "AMD"
.section ".note.unknown", "a"
.align 4
ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
};
-static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
+static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
- LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
+};
+
+static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RS880),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV670),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV710),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV730),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV770),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CEDAR),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CYPRESS),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_JUNIPER),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_REDWOOD),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_SUMO),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_BARTS),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAICOS),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAYMAN),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_TURKS),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX600),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX601),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX602),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX700),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX701),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX702),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX703),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX704),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX705),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX801),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX802),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX803),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX805),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX810),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX900),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX908),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ANY_V4),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_OFF_V4),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
};
static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
switch (NoteType) {
default:
return {"", ""};
- case ELF::NT_AMD_AMDGPU_HSA_METADATA:
+ case ELF::NT_AMD_HSA_CODE_OBJECT_VERSION: {
+ struct CodeObjectVersion {
+ uint32_t MajorVersion;
+ uint32_t MinorVersion;
+ };
+ if (Desc.size() != sizeof(CodeObjectVersion))
+ return {"AMD HSA Code Object Version",
+ "Invalid AMD HSA Code Object Version"};
+ std::string VersionString;
+ raw_string_ostream StrOS(VersionString);
+ auto Version = reinterpret_cast<const CodeObjectVersion *>(Desc.data());
+ StrOS << "[Major: " << Version->MajorVersion
+ << ", Minor: " << Version->MinorVersion << "]";
+ return {"AMD HSA Code Object Version", VersionString};
+ }
+ case ELF::NT_AMD_HSA_HSAIL: {
+ struct HSAILProperties {
+ uint32_t HSAILMajorVersion;
+ uint32_t HSAILMinorVersion;
+ uint8_t Profile;
+ uint8_t MachineModel;
+ uint8_t DefaultFloatRound;
+ };
+ if (Desc.size() != sizeof(HSAILProperties))
+ return {"AMD HSA HSAIL Properties", "Invalid AMD HSA HSAIL Properties"};
+ auto Properties = reinterpret_cast<const HSAILProperties *>(Desc.data());
+ std::string HSAILPropetiesString;
+ raw_string_ostream StrOS(HSAILPropetiesString);
+ StrOS << "[HSAIL Major: " << Properties->HSAILMajorVersion
+ << ", HSAIL Minor: " << Properties->HSAILMinorVersion
+ << ", Profile: " << Properties->Profile
+ << ", Machine Model: " << Properties->MachineModel
+ << ", Default Float Round: " << Properties->DefaultFloatRound << "]";
+ return {"AMD HSA HSAIL Properties", HSAILPropetiesString};
+ }
+ case ELF::NT_AMD_HSA_ISA_VERSION: {
+ struct IsaVersion {
+ uint16_t VendorNameSize;
+ uint16_t ArchitectureNameSize;
+ uint32_t Major;
+ uint32_t Minor;
+ uint32_t Stepping;
+ };
+ if (Desc.size() < sizeof(IsaVersion))
+ return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"};
+ auto Isa = reinterpret_cast<const IsaVersion *>(Desc.data());
+ if (Desc.size() < sizeof(IsaVersion) +
+ Isa->VendorNameSize + Isa->ArchitectureNameSize ||
+ Isa->VendorNameSize == 0 || Isa->ArchitectureNameSize == 0)
+ return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"};
+ std::string IsaString;
+ raw_string_ostream StrOS(IsaString);
+ StrOS << "[Vendor: "
+ << StringRef((const char*)Desc.data() + sizeof(IsaVersion), Isa->VendorNameSize - 1)
+ << ", Architecture: "
+ << StringRef((const char*)Desc.data() + sizeof(IsaVersion) + Isa->VendorNameSize,
+ Isa->ArchitectureNameSize - 1)
+ << ", Major: " << Isa->Major << ", Minor: " << Isa->Minor
+ << ", Stepping: " << Isa->Stepping << "]";
+ return {"AMD HSA ISA Version", IsaString};
+ }
+ case ELF::NT_AMD_HSA_METADATA: {
+ if (Desc.size() == 0)
+ return {"AMD HSA Metadata", ""};
return {
- "HSA Metadata",
- std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
- case ELF::NT_AMD_AMDGPU_ISA:
+ "AMD HSA Metadata",
+ std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size() - 1)};
+ }
+ case ELF::NT_AMD_HSA_ISA_NAME: {
+ if (Desc.size() == 0)
+ return {"AMD HSA ISA Name", ""};
return {
- "ISA Version",
+ "AMD HSA ISA Name",
std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
}
+ case ELF::NT_AMD_PAL_METADATA: {
+ struct PALMetadata {
+ uint32_t Key;
+ uint32_t Value;
+ };
+ auto Isa = reinterpret_cast<const PALMetadata *>(Desc.data());
+ std::string MetadataString;
+ raw_string_ostream StrOS(MetadataString);
+ for (size_t I = 0, E = Desc.size() / sizeof(PALMetadata); I < E; ++E) {
+ StrOS << "[" << Isa[I].Key << ": " << Isa[I].Value << "]";
+ }
+ return {"AMD PAL Metadata", MetadataString};
+ }
+ }
}
struct AMDGPUNote {
return {"", ""};
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
- std::string HSAMetadataString;
+ std::string MetadataString;
if (!Verifier.verify(MsgPackDoc.getRoot()))
- HSAMetadataString = "Invalid AMDGPU Metadata\n";
+ MetadataString = "Invalid AMDGPU Metadata\n";
- raw_string_ostream StrOS(HSAMetadataString);
+ raw_string_ostream StrOS(MetadataString);
if (MsgPackDoc.getRoot().isScalar()) {
// TODO: passing a scalar root to toYAML() asserts:
// (PolymorphicTraits<T>::getKind(Val) != NodeKind::Scalar &&
};
static const NoteType AMDNoteTypes[] = {
- {ELF::NT_AMD_AMDGPU_HSA_METADATA,
- "NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"},
- {ELF::NT_AMD_AMDGPU_ISA, "NT_AMD_AMDGPU_ISA (ISA Version)"},
- {ELF::NT_AMD_AMDGPU_PAL_METADATA,
- "NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"},
+ {ELF::NT_AMD_HSA_CODE_OBJECT_VERSION,
+ "NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)"},
+ {ELF::NT_AMD_HSA_HSAIL, "NT_AMD_HSA_HSAIL (AMD HSA HSAIL Properties)"},
+ {ELF::NT_AMD_HSA_ISA_VERSION, "NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version)"},
+ {ELF::NT_AMD_HSA_METADATA, "NT_AMD_HSA_METADATA (AMD HSA Metadata)"},
+ {ELF::NT_AMD_HSA_ISA_NAME, "NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)"},
+ {ELF::NT_AMD_PAL_METADATA, "NT_AMD_PAL_METADATA (AMD PAL Metadata)"},
};
static const NoteType AMDGPUNoteTypes[] = {
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderMipsFlags),
unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
unsigned(ELF::EF_MIPS_MACH));
- else if (E.e_machine == EM_AMDGPU)
- W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAMDGPUFlags),
- unsigned(ELF::EF_AMDGPU_MACH));
- else if (E.e_machine == EM_RISCV)
+ else if (E.e_machine == EM_AMDGPU) {
+ switch (E.e_ident[ELF::EI_ABIVERSION]) {
+ default:
+ W.printHex("Flags", E.e_flags);
+ break;
+ case 0:
+ // ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
+ LLVM_FALLTHROUGH;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
+ W.printFlags("Flags", E.e_flags,
+ makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion3),
+ unsigned(ELF::EF_AMDGPU_MACH));
+ break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+ W.printFlags("Flags", E.e_flags,
+ makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion4),
+ unsigned(ELF::EF_AMDGPU_MACH),
+ unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
+ unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4));
+ break;
+ }
+ } else if (E.e_machine == EM_RISCV)
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderRISCVFlags));
else
W.printFlags("Flags", E.e_flags);