From 69c8aa26d8cc1014a791611be72c7c1ee5a3d336 Mon Sep 17 00:00:00 2001 From: Sam Kolton Date: Mon, 19 Dec 2016 11:43:15 +0000 Subject: [PATCH] AMDGPU: [AMDGPU] Assembler: add .hsa_code_object_metadata directive for functime metadata V2.0 Summary: Added pair of directives .hsa_code_object_metadata/.end_hsa_code_object_metadata. Between them user can put YAML string that would be directly put to the generated note. E.g.: ''' .hsa_code_object_metadata { amd.MDVersion: [ 2, 0 ] } .end_hsa_code_object_metadata ''' Based on D25046 Reviewers: vpykhtin, nhaustov, yaxunl, tstellarAMD Subscribers: arsenm, kzhuravl, wdng, nhaehnle, mgorny, tony-tye Differential Revision: https://reviews.llvm.org/D27619 llvm-svn: 290097 --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 2 +- .../Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 47 +++++++ .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 146 +++++++++++---------- .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h | 20 ++- llvm/test/MC/AMDGPU/hsa.s | 23 +++- llvm/test/MC/AMDGPU/metadata.s | 35 +++++ 6 files changed, 200 insertions(+), 73 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/metadata.s diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 5402298..a8e6902 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -119,7 +119,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { "AMD", "AMDGPU"); // Emit runtime metadata. - TS->emitRuntimeMetadata(M); + TS->EmitRuntimeMetadata(M); } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index ef3f502..545fed1 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -45,6 +46,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/MathExtras.h" #include #include #include @@ -678,6 +681,7 @@ private: bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); bool ParseDirectiveHSACodeObjectVersion(); bool ParseDirectiveHSACodeObjectISA(); + bool ParseDirectiveRuntimeMetadata(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); bool ParseSectionDirectiveHSAText(); @@ -1747,6 +1751,46 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { return false; } +bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() { + std::string Metadata; + raw_string_ostream MS(Metadata); + + getLexer().setSkipSpace(false); + + bool FoundEnd = false; + while (!getLexer().is(AsmToken::Eof)) { + while (getLexer().is(AsmToken::Space)) { + MS << ' '; + Lex(); + } + + if (getLexer().is(AsmToken::Identifier)) { + StringRef ID = getLexer().getTok().getIdentifier(); + if (ID == ".end_amdgpu_runtime_metadata") { + Lex(); + FoundEnd = true; + break; + } + } + + MS << Parser.parseStringToEndOfStatement() + << getContext().getAsmInfo()->getSeparatorString(); + + Parser.eatToEndOfStatement(); + } + + getLexer().setSkipSpace(true); + + if (getLexer().is(AsmToken::Eof) && !FoundEnd) + return TokError("expected directive .end_amdgpu_runtime_metadata not found"); + + MS.flush(); + + getTargetStreamer().EmitRuntimeMetadata(Metadata); + + return false; +} + bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header) { SmallString<40> ErrStr; @@ -1853,6 +1897,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".hsa_code_object_isa") return ParseDirectiveHSACodeObjectISA(); + if (IDVal == ".amdgpu_runtime_metadata") + return ParseDirectiveRuntimeMetadata(); + if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 7eef5ed..3392183 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -93,6 +93,18 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal( OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; } +void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) { + OS << "\t.amdgpu_runtime_metadata\n"; + OS << getRuntimeMDYAMLString(M); + OS << "\n\t.end_amdgpu_runtime_metadata\n"; +} + +void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef Metadata) { + OS << "\t.amdgpu_runtime_metadata"; + OS << Metadata; + OS << "\t.end_amdgpu_runtime_metadata\n"; +} + //===----------------------------------------------------------------------===// // AMDGPUTargetELFStreamer //===----------------------------------------------------------------------===// @@ -105,25 +117,39 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { } void +AMDGPUTargetELFStreamer::EmitAMDGPUNote(const MCExpr* DescSZ, + PT_NOTE::NoteType Type, + std::function EmitDesc) { + auto &S = getStreamer(); + auto &Context = S.getContext(); + + auto NameSZ = sizeof(PT_NOTE::NoteName); + + S.PushSection(); + S.SwitchSection(Context.getELFSection( + PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); + S.EmitIntValue(NameSZ, 4); // namesz + S.EmitValue(DescSZ, 4); // descz + S.EmitIntValue(Type, 4); // type + S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name + S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 + EmitDesc(S); // desc + S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 + S.PopSection(); +} + +void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) { - MCStreamer &OS = getStreamer(); - MCSectionELF *Note = - OS.getContext().getELFSection(PT_NOTE::SectionName, ELF::SHT_NOTE, - ELF::SHF_ALLOC); - auto NameSZ = sizeof(PT_NOTE::NoteName); - OS.PushSection(); - OS.SwitchSection(Note); - OS.EmitIntValue(NameSZ, 4); // namesz - OS.EmitIntValue(8, 4); // descz - OS.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type - OS.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name - OS.EmitValueToAlignment(4); - OS.EmitIntValue(Major, 4); // desc - OS.EmitIntValue(Minor, 4); - OS.EmitValueToAlignment(4); - OS.PopSection(); + EmitAMDGPUNote( + MCConstantExpr::create(8, getContext()), + PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, + [&](MCELFStreamer &OS){ + OS.EmitIntValue(Major, 4); + OS.EmitIntValue(Minor, 4); + } + ); } void @@ -132,36 +158,28 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Stepping, StringRef VendorName, StringRef ArchName) { - MCStreamer &OS = getStreamer(); - MCSectionELF *Note = - OS.getContext().getELFSection(PT_NOTE::SectionName, ELF::SHT_NOTE, - ELF::SHF_ALLOC); - uint16_t VendorNameSize = VendorName.size() + 1; uint16_t ArchNameSize = ArchName.size() + 1; + unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + - sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + - VendorNameSize + ArchNameSize; - - OS.PushSection(); - OS.SwitchSection(Note); - auto NameSZ = sizeof(PT_NOTE::NoteName); - OS.EmitIntValue(NameSZ, 4); // namesz - OS.EmitIntValue(DescSZ, 4); // descsz - OS.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_ISA, 4); // type - OS.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name - OS.EmitValueToAlignment(4); - OS.EmitIntValue(VendorNameSize, 2); // desc - OS.EmitIntValue(ArchNameSize, 2); - OS.EmitIntValue(Major, 4); - OS.EmitIntValue(Minor, 4); - OS.EmitIntValue(Stepping, 4); - OS.EmitBytes(VendorName); - OS.EmitIntValue(0, 1); // NULL terminate VendorName - OS.EmitBytes(ArchName); - OS.EmitIntValue(0, 1); // NULL terminte ArchName - OS.EmitValueToAlignment(4); - OS.PopSection(); + sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + + VendorNameSize + ArchNameSize; + + EmitAMDGPUNote( + MCConstantExpr::create(DescSZ, getContext()), + PT_NOTE::NT_AMDGPU_HSA_ISA, + [&](MCELFStreamer &OS) { + OS.EmitIntValue(VendorNameSize, 2); + OS.EmitIntValue(ArchNameSize, 2); + OS.EmitIntValue(Major, 4); + OS.EmitIntValue(Minor, 4); + OS.EmitIntValue(Stepping, 4); + OS.EmitBytes(VendorName); + OS.EmitIntValue(0, 1); // NULL terminate VendorName + OS.EmitBytes(ArchName); + OS.EmitIntValue(0, 1); // NULL terminte ArchName + } + ); } void @@ -198,35 +216,27 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal( Symbol->setBinding(ELF::STB_GLOBAL); } -void AMDGPUTargetELFStreamer::emitRuntimeMetadata(Module &M) { - auto &S = getStreamer(); - auto &Context = S.getContext(); - - auto NameSZ = sizeof(PT_NOTE::NoteName); // Size of note name including trailing null. - - S.PushSection(); - S.SwitchSection(Context.getELFSection( - PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); - +void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) { // Create two labels to mark the beginning and end of the desc field // and a MCExpr to calculate the size of the desc field. + auto &Context = getContext(); auto *DescBegin = Context.createTempSymbol(); auto *DescEnd = Context.createTempSymbol(); auto *DescSZ = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(DescEnd, Context), - MCSymbolRefExpr::create(DescBegin, Context), Context); + MCSymbolRefExpr::create(DescEnd, Context), + MCSymbolRefExpr::create(DescBegin, Context), Context); + + EmitAMDGPUNote( + DescSZ, + PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, + [&](MCELFStreamer &OS) { + OS.EmitLabel(DescBegin); + OS.EmitBytes(Metadata); + OS.EmitLabel(DescEnd); + } + ); +} - // Emit the note element for runtime metadata. - // Name and desc should be padded to 4 byte boundary but size of name and - // desc should not include padding 0's. - S.EmitIntValue(NameSZ, 4); // namesz - S.EmitValue(DescSZ, 4); // descz - S.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, 4); // type - S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name - S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 - S.EmitLabel(DescBegin); - S.EmitBytes(getRuntimeMDYAMLString(M)); // desc - S.EmitLabel(DescEnd); - S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 - S.PopSection(); +void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) { + EmitRuntimeMetadata(getRuntimeMDYAMLString(M)); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 530ab9a..e2f2058 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -14,6 +14,7 @@ #include "llvm/MC/MCStreamer.h" namespace llvm { +#include "AMDGPUPTNote.h" class DataLayout; class Function; @@ -24,6 +25,9 @@ class Module; class Type; class AMDGPUTargetStreamer : public MCTargetStreamer { +protected: + MCContext &getContext() const { return Streamer.getContext(); } + public: AMDGPUTargetStreamer(MCStreamer &S); virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, @@ -42,7 +46,9 @@ public: virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void emitRuntimeMetadata(Module &M) = 0; + virtual void EmitRuntimeMetadata(Module &M) = 0; + + virtual void EmitRuntimeMetadata(StringRef Metadata) = 0; }; class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer { @@ -64,12 +70,18 @@ public: void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void emitRuntimeMetadata(Module &M) override {} + void EmitRuntimeMetadata(Module &M) override; + + void EmitRuntimeMetadata(StringRef Metadata) override; }; class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer { MCStreamer &Streamer; + void EmitAMDGPUNote(const MCExpr* DescSize, + AMDGPU::PT_NOTE::NoteType Type, + std::function EmitDesc); + public: AMDGPUTargetELFStreamer(MCStreamer &S); @@ -90,7 +102,9 @@ public: void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void emitRuntimeMetadata(Module &M) override; + void EmitRuntimeMetadata(Module &M) override; + + void EmitRuntimeMetadata(StringRef Metadata) override; }; } diff --git a/llvm/test/MC/AMDGPU/hsa.s b/llvm/test/MC/AMDGPU/hsa.s index b95a790..b428c81 100644 --- a/llvm/test/MC/AMDGPU/hsa.s +++ b/llvm/test/MC/AMDGPU/hsa.s @@ -14,6 +14,8 @@ // ELF: 0020: 03000000 414D4400 04000700 07000000 // ELF: 0030: 00000000 00000000 414D4400 414D4447 // ELF: 0040: 50550000 +// We can't check binary representation of metadata note: it is different on +// Windows and Linux because of carriage return on Windows // ELF: Symbol { // ELF: Name: amd_kernel_code_t_minimal @@ -35,10 +37,29 @@ .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" // ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" +.amdgpu_runtime_metadata + { + amd.MDVersion: [ 2, 0 ] + amd.Kernels: [ + { amd.KernelName: amd_kernel_code_t_test_all }, + { amd.KernelName: amd_kernel_code_t_minimal } + ] + } +.end_amdgpu_runtime_metadata + +// ASM: .amdgpu_runtime_metadata +// ASM: { +// ASM: amd.MDVersion: [ 2, 0 ] +// ASM: amd.Kernels: [ +// ASM: { amd.KernelName: amd_kernel_code_t_test_all }, +// ASM: { amd.KernelName: amd_kernel_code_t_minimal } +// ASM: ] +// ASM: } +// ASM: .end_amdgpu_runtime_metadata + .amdgpu_hsa_kernel amd_kernel_code_t_test_all .amdgpu_hsa_kernel amd_kernel_code_t_minimal - amd_kernel_code_t_test_all: ; Test all amd_kernel_code_t members with non-default values. .amd_kernel_code_t diff --git a/llvm/test/MC/AMDGPU/metadata.s b/llvm/test/MC/AMDGPU/metadata.s new file mode 100644 index 0000000..3c009ff5 --- /dev/null +++ b/llvm/test/MC/AMDGPU/metadata.s @@ -0,0 +1,35 @@ +// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM + +.amdgpu_runtime_metadata + { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: + + - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: + - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } + + - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: + - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } + } +.end_amdgpu_runtime_metadata + +// ASM: { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: +// ASM: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: +// ASM: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } +// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } +// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } +// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } +// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } +// ASM: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: +// ASM: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } +// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } +// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } +// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } +// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } +// ASM: } -- 2.7.4