From 4d00dd2b93765964b4478bed3076479267acf8ac Mon Sep 17 00:00:00 2001 From: Marek Olsak Date: Mon, 9 Mar 2015 15:48:09 +0000 Subject: [PATCH] R600/SI: Limit SGPRs to 80 on Tonga and Iceland This is a candidate for stable. llvm-svn: 231659 --- llvm/lib/Target/R600/AMDGPU.td | 5 +++++ llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp | 7 +++++++ llvm/lib/Target/R600/AMDGPUSubtarget.cpp | 2 +- llvm/lib/Target/R600/AMDGPUSubtarget.h | 9 +++++++++ llvm/lib/Target/R600/Processors.td | 8 ++++++-- llvm/lib/Target/R600/SIRegisterInfo.cpp | 17 +++++++++++++++++ llvm/test/CodeGen/R600/elf.ll | 9 ++++++--- 7 files changed, 51 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/R600/AMDGPU.td b/llvm/lib/Target/R600/AMDGPU.td index a7d48b3..e5d5ce2 100644 --- a/llvm/lib/Target/R600/AMDGPU.td +++ b/llvm/lib/Target/R600/AMDGPU.td @@ -103,6 +103,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", "true", "Enable spilling of VGPRs to scratch memory">; +def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", + "SGPRInitBug", + "true", + "VI SGPR initilization bug requiring a fixed SGPR allocation size">; + class SubtargetFeatureFetchLimit : SubtargetFeature <"fetch"#Value, "TexVTXClauseSize", diff --git a/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp index 92bc314..5e1b6a3 100644 --- a/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -339,6 +339,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.NumVGPR = MaxVGPR + 1; ProgInfo.NumSGPR = MaxSGPR + 1; + if (STM.hasSGPRInitBug()) { + if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) + llvm_unreachable("Too many SGPRs used with the SGPR init bug"); + + ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + } + ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4; ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8; // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp index 70c8525..0ead652 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp @@ -70,7 +70,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS, CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), - EnableVGPRSpilling(false), + EnableVGPRSpilling(false), SGPRInitBug(false), FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16, // Maximum stack alignment (long16) 0), diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.h b/llvm/lib/Target/R600/AMDGPUSubtarget.h index 1b0122c..403a3e4 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.h +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.h @@ -44,6 +44,10 @@ public: VOLCANIC_ISLANDS, }; + enum { + FIXED_SGPR_COUNT_FOR_INIT_BUG = 80 + }; + private: std::string DevName; bool Is64bit; @@ -66,6 +70,7 @@ private: bool CFALUBug; int LocalMemorySize; bool EnableVGPRSpilling; + bool SGPRInitBug; AMDGPUFrameLowering FrameLowering; std::unique_ptr TLInfo; @@ -206,6 +211,10 @@ public: return LocalMemorySize; } + bool hasSGPRInitBug() const { + return SGPRInitBug; + } + unsigned getAmdKernelCodeChipID() const; bool enableMachineScheduler() const override { diff --git a/llvm/lib/Target/R600/Processors.td b/llvm/lib/Target/R600/Processors.td index fb5aa61..82c6d13 100644 --- a/llvm/lib/Target/R600/Processors.td +++ b/llvm/lib/Target/R600/Processors.td @@ -119,8 +119,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>; // Volcanic Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"tonga", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; +def : ProcessorModel<"tonga", SIQuarterSpeedModel, + [FeatureVolcanicIslands, FeatureSGPRInitBug] +>; -def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; +def : ProcessorModel<"iceland", SIQuarterSpeedModel, + [FeatureVolcanicIslands, FeatureSGPRInitBug] +>; def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; diff --git a/llvm/lib/Target/R600/SIRegisterInfo.cpp b/llvm/lib/Target/R600/SIRegisterInfo.cpp index f3a4282..2571472 100644 --- a/llvm/lib/Target/R600/SIRegisterInfo.cpp +++ b/llvm/lib/Target/R600/SIRegisterInfo.cpp @@ -46,6 +46,23 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::VGPR255); Reserved.set(AMDGPU::VGPR254); + // Tonga and Iceland can only allocate a fixed number of SGPRs due + // to a hw bug. + if (ST.hasSGPRInitBug()) { + unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); + // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). + // Assume XNACK_MASK is unused. + unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; + + for (unsigned i = Limit; i < NumSGPRs; ++i) { + unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); + MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true); + + for (; R.isValid(); ++R) + Reserved.set(*R); + } + } + return Reserved; } diff --git a/llvm/test/CodeGen/R600/elf.ll b/llvm/test/CodeGen/R600/elf.ll index aca3109..f801b3f 100644 --- a/llvm/test/CodeGen/R600/elf.ll +++ b/llvm/test/CodeGen/R600/elf.ll @@ -1,7 +1,9 @@ ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s ; ELF: Format: ELF32 ; ELF: Name: .AMDGPU.config @@ -15,7 +17,8 @@ ; CONFIG: test: ; CONFIG: .section .AMDGPU.config ; CONFIG-NEXT: .long 45096 -; CONFIG-NEXT: .long 0 +; TYPICAL-NEXT: .long 0 +; TONGA-NEXT: .long 576 define void @test(i32 %p) #0 { %i = add i32 %p, 2 %r = bitcast i32 %i to float -- 2.7.4