From: Konstantin Zhuravlyov Date: Tue, 28 Nov 2017 17:51:08 +0000 (+0000) Subject: AMDGPU: Add num spilled s/vgprs to metadata X-Git-Tag: llvmorg-6.0.0-rc1~2493 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=06ae4ec78ed189a06a0e376fb57abfeff3f7aad6;p=platform%2Fupstream%2Fllvm.git AMDGPU: Add num spilled s/vgprs to metadata This was requested by tools. Differential Revision: https://reviews.llvm.org/D40321 llvm-svn: 319192 --- diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 1cf3030..ecb0c11 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1295,6 +1295,16 @@ non-AMD key names should be prefixed by "*vendor-name*.". code is capable of supporting XNACK. See :ref:`amdgpu-target-features`. + "NumSpilledSGPRs" integer Number of stores from + a scalar register to + a register allocator + created spill + location. + "NumSpilledVGPRs" integer Number of stores from + a vector register to + a register allocator + created spill + location. ============================ ============== ========= ===================== .. diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h index 0c8d022..00039a7 100644 --- a/llvm/include/llvm/Support/AMDGPUMetadata.h +++ b/llvm/include/llvm/Support/AMDGPUMetadata.h @@ -244,6 +244,10 @@ constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize"; constexpr char IsDynamicCallStack[] = "IsDynamicCallStack"; /// \brief Key for Kernel::CodeProps::Metadata::mIsXNACKEnabled. constexpr char IsXNACKEnabled[] = "IsXNACKEnabled"; +/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs. +constexpr char NumSpilledSGPRs[] = "NumSpilledSGPRs"; +/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs. +constexpr char NumSpilledVGPRs[] = "NumSpilledVGPRs"; } // end namespace Key /// \brief In-memory representation of kernel code properties metadata. @@ -275,6 +279,10 @@ struct Metadata final { /// \brief True if the generated machine code is capable of supporting XNACK. /// Optional. bool mIsXNACKEnabled = false; + /// \brief Number of SGPRs spilled by a wavefront. Optional. + uint16_t mNumSpilledSGPRs = 0; + /// \brief Number of VGPRs spilled by a workitem. Optional. + uint16_t mNumSpilledVGPRs = 0; /// \brief Default constructor. Metadata() = default; diff --git a/llvm/lib/Support/AMDGPUMetadata.cpp b/llvm/lib/Support/AMDGPUMetadata.cpp index ec2714c..ddb25935 100644 --- a/llvm/lib/Support/AMDGPUMetadata.cpp +++ b/llvm/lib/Support/AMDGPUMetadata.cpp @@ -148,6 +148,10 @@ struct MappingTraits { MD.mIsDynamicCallStack, false); YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled, MD.mIsXNACKEnabled, false); + YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledSGPRs, + MD.mNumSpilledSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledVGPRs, + MD.mNumSpilledVGPRs, uint16_t(0)); } }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 563ca0d..9c87eca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1188,6 +1188,8 @@ AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps( HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); + HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); + HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); return HSACodeProps; } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll index 2d02b46..f4a914a 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll @@ -1,26 +1,26 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +@var = addrspace(1) global float 0.0 + ; CHECK: --- ; CHECK: Version: [ 1, 0 ] - ; CHECK: Kernels: -; CHECK: - Name: test -; CHECK: SymbolName: 'test@kd' -; CHECK: CodeProps: -; CHECK: KernargSegmentSize: 24 -; CHECK: GroupSegmentFixedSize: 0 -; CHECK: PrivateSegmentFixedSize: 0 -; CHECK: KernargSegmentAlign: 8 -; CHECK: WavefrontSize: 64 -; GFX700: NumSGPRs: 6 -; GFX800: NumSGPRs: 96 -; GFX900: NumSGPRs: 6 -; GFX700: NumVGPRs: 4 -; GFX800: NumVGPRs: 6 -; GFX900: NumVGPRs: 6 -; CHECK: MaxFlatWorkGroupSize: 256 + +; CHECK: - Name: test +; CHECK: SymbolName: 'test@kd' +; CHECK: CodeProps: +; CHECK: KernargSegmentSize: 24 +; CHECK: GroupSegmentFixedSize: 0 +; CHECK: PrivateSegmentFixedSize: 0 +; CHECK: KernargSegmentAlign: 8 +; CHECK: WavefrontSize: 64 +; CHECK: NumSGPRs: 6 +; GFX700: NumVGPRs: 4 +; GFX803: NumVGPRs: 6 +; GFX900: NumVGPRs: 6 +; CHECK: MaxFlatWorkGroupSize: 256 define amdgpu_kernel void @test( half addrspace(1)* %r, half addrspace(1)* %a, @@ -32,3 +32,111 @@ entry: store half %r.val, half addrspace(1)* %r ret void } + +; CHECK: - Name: num_spilled_sgprs +; CHECK: SymbolName: 'num_spilled_sgprs@kd' +; CHECK: CodeProps: +; CHECK: NumSpilledSGPRs: 41 +define amdgpu_kernel void @num_spilled_sgprs( + i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %out2, + i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, i32 addrspace(1)* %out5, + i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, i32 addrspace(1)* %out8, + i32 addrspace(1)* %out9, i32 addrspace(1)* %outa, i32 addrspace(1)* %outb, + i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, i32 addrspace(1)* %oute, + i32 addrspace(1)* %outf, i32 %in0, i32 %in1, i32 %in2, i32 %in3, i32 %in4, + i32 %in5, i32 %in6, i32 %in7, i32 %in8, i32 %in9, i32 %ina, i32 %inb, + i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 { +entry: + store i32 %in0, i32 addrspace(1)* %out0 + store i32 %in1, i32 addrspace(1)* %out1 + store i32 %in2, i32 addrspace(1)* %out2 + store i32 %in3, i32 addrspace(1)* %out3 + store i32 %in4, i32 addrspace(1)* %out4 + store i32 %in5, i32 addrspace(1)* %out5 + store i32 %in6, i32 addrspace(1)* %out6 + store i32 %in7, i32 addrspace(1)* %out7 + store i32 %in8, i32 addrspace(1)* %out8 + store i32 %in9, i32 addrspace(1)* %out9 + store i32 %ina, i32 addrspace(1)* %outa + store i32 %inb, i32 addrspace(1)* %outb + store i32 %inc, i32 addrspace(1)* %outc + store i32 %ind, i32 addrspace(1)* %outd + store i32 %ine, i32 addrspace(1)* %oute + store i32 %inf, i32 addrspace(1)* %outf + ret void +} + +; CHECK: - Name: num_spilled_vgprs +; CHECK: SymbolName: 'num_spilled_vgprs@kd' +; CHECK: CodeProps: +; CHECK: NumSpilledVGPRs: 14 +define amdgpu_kernel void @num_spilled_vgprs() #1 { + %val0 = load volatile float, float addrspace(1)* @var + %val1 = load volatile float, float addrspace(1)* @var + %val2 = load volatile float, float addrspace(1)* @var + %val3 = load volatile float, float addrspace(1)* @var + %val4 = load volatile float, float addrspace(1)* @var + %val5 = load volatile float, float addrspace(1)* @var + %val6 = load volatile float, float addrspace(1)* @var + %val7 = load volatile float, float addrspace(1)* @var + %val8 = load volatile float, float addrspace(1)* @var + %val9 = load volatile float, float addrspace(1)* @var + %val10 = load volatile float, float addrspace(1)* @var + %val11 = load volatile float, float addrspace(1)* @var + %val12 = load volatile float, float addrspace(1)* @var + %val13 = load volatile float, float addrspace(1)* @var + %val14 = load volatile float, float addrspace(1)* @var + %val15 = load volatile float, float addrspace(1)* @var + %val16 = load volatile float, float addrspace(1)* @var + %val17 = load volatile float, float addrspace(1)* @var + %val18 = load volatile float, float addrspace(1)* @var + %val19 = load volatile float, float addrspace(1)* @var + %val20 = load volatile float, float addrspace(1)* @var + %val21 = load volatile float, float addrspace(1)* @var + %val22 = load volatile float, float addrspace(1)* @var + %val23 = load volatile float, float addrspace(1)* @var + %val24 = load volatile float, float addrspace(1)* @var + %val25 = load volatile float, float addrspace(1)* @var + %val26 = load volatile float, float addrspace(1)* @var + %val27 = load volatile float, float addrspace(1)* @var + %val28 = load volatile float, float addrspace(1)* @var + %val29 = load volatile float, float addrspace(1)* @var + %val30 = load volatile float, float addrspace(1)* @var + + store volatile float %val0, float addrspace(1)* @var + store volatile float %val1, float addrspace(1)* @var + store volatile float %val2, float addrspace(1)* @var + store volatile float %val3, float addrspace(1)* @var + store volatile float %val4, float addrspace(1)* @var + store volatile float %val5, float addrspace(1)* @var + store volatile float %val6, float addrspace(1)* @var + store volatile float %val7, float addrspace(1)* @var + store volatile float %val8, float addrspace(1)* @var + store volatile float %val9, float addrspace(1)* @var + store volatile float %val10, float addrspace(1)* @var + store volatile float %val11, float addrspace(1)* @var + store volatile float %val12, float addrspace(1)* @var + store volatile float %val13, float addrspace(1)* @var + store volatile float %val14, float addrspace(1)* @var + store volatile float %val15, float addrspace(1)* @var + store volatile float %val16, float addrspace(1)* @var + store volatile float %val17, float addrspace(1)* @var + store volatile float %val18, float addrspace(1)* @var + store volatile float %val19, float addrspace(1)* @var + store volatile float %val20, float addrspace(1)* @var + store volatile float %val21, float addrspace(1)* @var + store volatile float %val22, float addrspace(1)* @var + store volatile float %val23, float addrspace(1)* @var + store volatile float %val24, float addrspace(1)* @var + store volatile float %val25, float addrspace(1)* @var + store volatile float %val26, float addrspace(1)* @var + store volatile float %val27, float addrspace(1)* @var + store volatile float %val28, float addrspace(1)* @var + store volatile float %val29, float addrspace(1)* @var + store volatile float %val30, float addrspace(1)* @var + + ret void +} + +attributes #0 = { "amdgpu-num-sgpr"="14" } +attributes #1 = { "amdgpu-num-vgpr"="20" } diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s index 54c4b4a..0b04042 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s @@ -14,6 +14,8 @@ // CHECK: KernargSegmentAlign: 16 // CHECK: WavefrontSize: 64 // CHECK: MaxFlatWorkGroupSize: 256 +// CHECK: NumSpilledSGPRs: 1 +// CHECK: NumSpilledVGPRs: 1 .amd_amdgpu_hsa_metadata Version: [ 1, 0 ] Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] @@ -27,4 +29,6 @@ KernargSegmentAlign: 16 WavefrontSize: 64 MaxFlatWorkGroupSize: 256 + NumSpilledSGPRs: 1 + NumSpilledVGPRs: 1 .end_amd_amdgpu_hsa_metadata