From 1de4e5693317c1e34cae4740d6c9c6733e5ed3fd Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Tue, 17 Mar 2020 13:14:24 +0100 Subject: [PATCH] [AMDGPU] Don't mark the .note section as ALLOC Marking a section as ALLOC tells the ELF loader to load the section into memory. As we do not want to load the notes into VRAM, the flag should not be there. On AMDHSA, .note is still marked as ALLOC, apparently this is currently needed for OpenCL (see https://reviews.llvm.org/D74995). Differential Revision: https://reviews.llvm.org/D76278 --- .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 16 +++++++---- .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h | 1 + llvm/test/CodeGen/AMDGPU/amdpal-elf.ll | 33 ++++++++++++++++++++++ 3 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/amdpal-elf.ll diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 362952f..a699cd3 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -393,9 +393,9 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( // AMDGPUTargetELFStreamer //===----------------------------------------------------------------------===// -AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( - MCStreamer &S, const MCSubtargetInfo &STI) - : AMDGPUTargetStreamer(S), Streamer(S) { +AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) + : AMDGPUTargetStreamer(S), Streamer(S), Os(STI.getTargetTriple().getOS()) { MCAssembler &MCA = getStreamer().getAssembler(); unsigned EFlags = MCA.getELFHeaderEFlags(); @@ -438,9 +438,15 @@ void AMDGPUTargetELFStreamer::EmitNote( auto NameSZ = Name.size() + 1; + unsigned NoteFlags = 0; + // TODO Apparently, this is currently needed for OpenCL as mentioned in + // https://reviews.llvm.org/D74995 + if (Os == Triple::AMDHSA) + NoteFlags = ELF::SHF_ALLOC; + S.PushSection(); - S.SwitchSection(Context.getELFSection( - ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); + S.SwitchSection( + Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags)); S.emitInt32(NameSZ); // namesz S.emitValue(DescSZ, 4); // descz S.emitInt32(NoteType); // type diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 683b3e3..5f89da3 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -133,6 +133,7 @@ public: class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; + Triple::OSType Os; void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType, function_ref EmitDesc); diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll new file mode 100644 index 0000000..412815f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri -filetype=obj -mattr=-code-object-v3 | llvm-readobj -symbols -s -sd | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri -mattr=-code-object-v3 | llvm-mc -filetype=obj -triple amdgcn--amdpal -mcpu=kaveri -mattr=-code-object-v3 | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64,-code-object-v3 | FileCheck --check-prefix=GFX10-W32 %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 | FileCheck --check-prefix=GFX10-W64 %s + +; ELF: Section { +; ELF: Name: .text +; ELF: Type: SHT_PROGBITS (0x1) +; ELF: Flags [ (0x6) +; ELF: SHF_ALLOC (0x2) +; ELF: SHF_EXECINSTR (0x4) +; ELF: } + +; ELF: SHT_NOTE +; ELF: Flags [ (0x0) +; ELF: ] + +; ELF: Symbol { +; ELF: Name: simple +; ELF: Size: 36 +; ELF: Section: .text (0x2) +; ELF: } + +; GFX10-W32: NumSGPRsForWavesPerEU: 4 +; GFX10-W32: NumVGPRsForWavesPerEU: 3 +; GFX10-W64: NumSGPRsForWavesPerEU: 2 +; GFX10-W64: NumVGPRsForWavesPerEU: 3 + +define amdgpu_kernel void @simple(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} -- 2.7.4