[AMDGPU] Enable base pointer.

author Christudasan Devadasan <Christudasan.Devadasan@amd.com>

Tue, 21 Apr 2020 09:34:33 +0000 (15:04 +0530)

committer Christudasan Devadasan <Christudasan.Devadasan@amd.com>

Sun, 17 May 2020 10:43:55 +0000 (16:13 +0530)
author Christudasan Devadasan <Christudasan.Devadasan@amd.com>
Tue, 21 Apr 2020 09:34:33 +0000 (15:04 +0530)
committer Christudasan Devadasan <Christudasan.Devadasan@amd.com>
Sun, 17 May 2020 10:43:55 +0000 (16:13 +0530)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst

index 01e0c65..976fd48 100644 (file)
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -6717,16 +6717,13 @@ describes how the AMDGPU implements function calls:
  1.  SGPR33 is used as a frame pointer (FP) if necessary. Like the SP it is an
      unswizzled scratch address. It is only needed if runtime sized ``alloca``
      are used, or for the reasons defined in ``SIFrameLowering``.
-2.  Runtime stack alignment is not currently supported.
+2.  Runtime stack alignment is supported. SGPR34 is used as a base pointer (BP)
+    to access the incoming stack arguments in the function. The BP is needed
+    only when the function requires the runtime stack alignment.
  
-    .. TODO::
-
-      - If runtime stack alignment is supported, then will an extra argument
-        pointer register be used?
-
-2.  Allocating SGPR arguments on the stack are not supported.
+3.  Allocating SGPR arguments on the stack are not supported.
  
-3.  No CFI is currently generated. See
+4.  No CFI is currently generated. See
      :ref:`amdgpu-dwarf-call-frame-information`.
  
      ..note::
@@ -6745,12 +6742,12 @@ describes how the AMDGPU implements function calls:
        local variables and register spill slots are accessed as positive offsets
        relative to ``DW_AT_frame_base``.
  
-4.  Function argument passing is implemented by copying the input physical
+5.  Function argument passing is implemented by copying the input physical
      registers to virtual registers on entry. The register allocator can spill if
      necessary. These are copied back to physical registers at call sites. The
      net effect is that each function call can have these values in entirely
      distinct locations. The IPRA can help avoid shuffling argument registers.
-5.  Call sites are implemented by setting up the arguments at positive offsets
+6.  Call sites are implemented by setting up the arguments at positive offsets
      from SP. Then SP is incremented to account for the known frame size before
      the call and decremented after the call.
  
@@ -6759,7 +6756,7 @@ describes how the AMDGPU implements function calls:
        The CFI will reflect the changed calculation needed to compute the CFA
        from SP.
  
-6.  4 byte spill slots are used in the stack frame. One slot is allocated for an
+7.  4 byte spill slots are used in the stack frame. One slot is allocated for an
      emergency spill slot. Buffer instructions are used for stack accesses and
      not the ``flat_scratch`` instruction.
  
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

index f4db9d2..96da89d 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -78,11 +78,64 @@ static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
    return MCRegister();
  }
  
-static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) {
-  LivePhysRegs LiveRegs;
-  LiveRegs.init(*MRI.getTargetRegisterInfo());
-  return findScratchNonCalleeSaveRegister(
-    MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
+static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
+                                           LivePhysRegs &LiveRegs,
+                                           Register &TempSGPR,
+                                           Optional<int> &FrameIndex,
+                                           bool IsFP) {
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+
+#ifndef NDEBUG
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+#endif
+
+  // We need to save and restore the current FP/BP.
+
+  // 1: If there is already a VGPR with free lanes, use it. We
+  // may already have to pay the penalty for spilling a CSR VGPR.
+  if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
+    int NewFI = FrameInfo.CreateStackObject(4, 4, true, nullptr,
+                                            TargetStackID::SGPRSpill);
+
+    if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
+      llvm_unreachable("allocate SGPR spill should have worked");
+
+    FrameIndex = NewFI;
+
+    LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
+               dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to  "
+                      << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
+                      << '\n');
+    return;
+  }
+
+  // 2: Next, try to save the FP/BP in an unused SGPR.
+  TempSGPR = findScratchNonCalleeSaveRegister(
+      MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
+
+  if (!TempSGPR) {
+    int NewFI = FrameInfo.CreateStackObject(4, 4, true, nullptr,
+                                            TargetStackID::SGPRSpill);
+
+    if (MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
+      // 3: There's no free lane to spill, and no free register to save FP/BP,
+      // so we're forced to spill another VGPR to use for the spill.
+      FrameIndex = NewFI;
+    } else {
+      // 4: If all else fails, spill the FP/BP to memory.
+      FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
+    }
+
+    LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
+               dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
+                      << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
+                      << '\n';);
+  } else {
+    LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
+                      << printReg(TempSGPR, TRI) << '\n');
+  }
  }
  
  // We need to specially emit stack operations here because a different frame
@@ -613,6 +666,9 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
        LiveRegs.addLiveIns(MBB);
        if (FuncInfo->SGPRForFPSaveRestoreCopy)
          LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+
+      if (FuncInfo->SGPRForBPSaveRestoreCopy)
+        LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy);
      } else {
        // In epilog.
        LiveRegs.init(*ST.getRegisterInfo());
@@ -650,12 +706,15 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
  
    Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
    Register FramePtrReg = FuncInfo->getFrameOffsetReg();
+  Register BasePtrReg =
+      TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
    LivePhysRegs LiveRegs;
  
    MachineBasicBlock::iterator MBBI = MBB.begin();
    DebugLoc DL;
  
    bool HasFP = false;
+  bool HasBP = false;
    uint32_t NumBytes = MFI.getStackSize();
    uint32_t RoundedSize = NumBytes;
    // To avoid clobbering VGPRs in lanes that weren't active on function entry,
@@ -671,14 +730,46 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
                        TargetStackID::SGPRSpill;
    }
  
+  bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue();
+  bool SpillBPToMemory = false;
+  // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
+  // Otherwise we are spilling the BP to memory.
+  if (HasBPSaveIndex) {
+    SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) !=
+                      TargetStackID::SGPRSpill;
+  }
+
    // Emit the copy if we need an FP, and are using a free SGPR to save it.
    if (FuncInfo->SGPRForFPSaveRestoreCopy) {
      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
        .addReg(FramePtrReg)
        .setMIFlag(MachineInstr::FrameSetup);
-    // Make the register live throughout the function.
-    for (MachineBasicBlock &MBB : MF)
-      MBB.addLiveIn(FuncInfo->SGPRForFPSaveRestoreCopy);
+  }
+
+  // Emit the copy if we need a BP, and are using a free SGPR to save it.
+  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
+            FuncInfo->SGPRForBPSaveRestoreCopy)
+        .addReg(BasePtrReg)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  // If a copy has been emitted for FP and/or BP, Make the SGPRs
+  // used in the copy instructions live throughout the function.
+  SmallVector<MCPhysReg, 2> TempSGPRs;
+  if (FuncInfo->SGPRForFPSaveRestoreCopy)
+    TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
+
+  if (FuncInfo->SGPRForBPSaveRestoreCopy)
+    TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
+
+  if (!TempSGPRs.empty()) {
+    for (MachineBasicBlock &MBB : MF) {
+      for (MCPhysReg Reg : TempSGPRs)
+        MBB.addLiveIn(Reg);
+
+      MBB.sortUniqueLiveIns();
+    }
    }
  
    for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
@@ -712,6 +803,23 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
                       FuncInfo->FramePointerSaveIndex.getValue());
    }
  
+  if (HasBPSaveIndex && SpillBPToMemory) {
+    assert(!MFI.isDeadObjectIndex(*FuncInfo->BasePointerSaveIndex));
+
+    if (!ScratchExecCopy)
+      ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
+
+    MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
+        MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
+        .addReg(BasePtrReg);
+
+    buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR,
+                     FuncInfo->getScratchRSrcReg(), StackPtrReg,
+                     *FuncInfo->BasePointerSaveIndex);
+  }
+
    if (ScratchExecCopy) {
      // FIXME: Split block and make terminator.
      unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
@@ -740,6 +848,25 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
          .addReg(Spill[0].VGPR, RegState::Undef);
    }
  
+  // In this case, spill the BP to a reserved VGPR.
+  if (HasBPSaveIndex && !SpillBPToMemory) {
+    const int BasePtrFI = *FuncInfo->BasePointerSaveIndex;
+    assert(!MFI.isDeadObjectIndex(BasePtrFI));
+
+    assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
+    ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
+        FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
+    assert(Spill.size() == 1);
+
+    // Save BP before setting it up.
+    // FIXME: This should respect spillSGPRToVGPR;
+    BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
+            Spill[0].VGPR)
+        .addReg(BasePtrReg)
+        .addImm(Spill[0].Lane)
+        .addReg(Spill[0].VGPR, RegState::Undef);
+  }
+
    if (TRI.needsStackRealignment(MF)) {
      HasFP = true;
      const unsigned Alignment = MFI.getMaxAlign().value();
@@ -749,11 +876,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
        LiveRegs.init(TRI);
        LiveRegs.addLiveIns(MBB);
        LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+      LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
      }
  
      Register ScratchSPReg = findScratchNonCalleeSaveRegister(
          MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
-    assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy);
+    assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy &&
+           ScratchSPReg != FuncInfo->SGPRForBPSaveRestoreCopy);
  
      // s_add_u32 tmp_reg, s32, NumBytes
      // s_and_b32 s32, tmp_reg, 0b111...0000
@@ -767,15 +896,21 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
          .setMIFlag(MachineInstr::FrameSetup);
      FuncInfo->setIsStackRealigned(true);
    } else if ((HasFP = hasFP(MF))) {
-    // If we need a base pointer, set it up here. It's whatever the value of
-    // the stack pointer is at this point. Any variable size objects will be
-    // allocated after this, so we can still use the base pointer to reference
-    // locals.
      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
          .addReg(StackPtrReg)
          .setMIFlag(MachineInstr::FrameSetup);
    }
  
+  // If we need a base pointer, set it up here. It's whatever the value of
+  // the stack pointer is at this point. Any variable size objects will be
+  // allocated after this, so we can still use the base pointer to reference
+  // the incoming arguments.
+  if ((HasBP = TRI.hasBasePointer(MF))) {
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
+        .addReg(StackPtrReg)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
    if (HasFP && RoundedSize != 0) {
      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
          .addReg(StackPtrReg)
@@ -790,6 +925,14 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
    assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
                      !FuncInfo->FramePointerSaveIndex)) &&
           "Saved FP but didn't need it");
+
+  assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
+                     FuncInfo->BasePointerSaveIndex)) &&
+         "Needed to save BP but didn't save it anywhere");
+
+  assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
+                    !FuncInfo->BasePointerSaveIndex)) &&
+         "Saved BP but didn't need it");
  }
  
  void SIFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -801,6 +944,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
    const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    const SIInstrInfo *TII = ST.getInstrInfo();
    MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
    MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
    LivePhysRegs LiveRegs;
    DebugLoc DL;
@@ -812,6 +956,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
                               : NumBytes;
    const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
    const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
+  const Register BasePtrReg =
+      TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
  
    bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue();
    bool SpillFPToMemory = false;
@@ -820,6 +966,13 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
                        TargetStackID::SGPRSpill;
    }
  
+  bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue();
+  bool SpillBPToMemory = false;
+  if (HasBPSaveIndex) {
+    SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) !=
+                      TargetStackID::SGPRSpill;
+  }
+
    if (RoundedSize != 0 && hasFP(MF)) {
      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
        .addReg(StackPtrReg)
@@ -833,6 +986,12 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
          .setMIFlag(MachineInstr::FrameSetup);
    }
  
+  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
+        .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
    Register ScratchExecCopy;
    if (HasFPSaveIndex) {
      const int FI = FuncInfo->FramePointerSaveIndex.getValue();
@@ -860,6 +1019,32 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
      }
    }
  
+  if (HasBPSaveIndex) {
+    const int BasePtrFI = *FuncInfo->BasePointerSaveIndex;
+    assert(!MFI.isDeadObjectIndex(BasePtrFI));
+    if (SpillBPToMemory) {
+      if (!ScratchExecCopy)
+        ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
+
+      MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister(
+          MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+      buildEpilogReload(LiveRegs, MBB, MBBI, TII, TempVGPR,
+                        FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI);
+      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
+          .addReg(TempVGPR, RegState::Kill);
+    } else {
+      // Reload from VGPR spill.
+      assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
+      ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
+          FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
+      assert(Spill.size() == 1);
+      BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+              BasePtrReg)
+          .addReg(Spill[0].VGPR)
+          .addImm(Spill[0].Lane);
+    }
+  }
+
    for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg :
         FuncInfo->getSGPRSpillVGPRs()) {
      if (!Reg.FI.hasValue())
@@ -896,12 +1081,14 @@ static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
  
  #ifndef NDEBUG
  static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
-                                 Optional<int> FramePointerSaveIndex) {
+                                 Optional<int> FramePointerSaveIndex,
+                                 Optional<int> BasePointerSaveIndex) {
    for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
         I != E; ++I) {
      if (!MFI.isDeadObjectIndex(I) &&
          MFI.getStackID(I) == TargetStackID::SGPRSpill &&
-        FramePointerSaveIndex && I != FramePointerSaveIndex) {
+        ((FramePointerSaveIndex && I != FramePointerSaveIndex) ||
+         (BasePointerSaveIndex && I != BasePointerSaveIndex))) {
        return false;
      }
    }
@@ -928,7 +1115,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
    SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
  
    FuncInfo->removeDeadFrameIndices(MFI);
-  assert(allSGPRSpillsAreDead(MFI, None) &&
+  assert(allSGPRSpillsAreDead(MFI, None, None) &&
           "SGPR spill should have been removed in SILowerSGPRSpills");
  
    // FIXME: The other checks should be redundant with allStackObjectsAreDead,
@@ -984,54 +1171,19 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
    for (auto SSpill : MFI->getSGPRSpillVGPRs())
      SavedVGPRs.reset(SSpill.VGPR);
  
-  const bool HasFP = WillHaveFP || hasFP(MF);
-  if (!HasFP)
-    return;
-
-  // We need to save and restore the current FP.
-
-  // 1: If there is already a VGPR with free lanes, use it. We
-  // may already have to pay the penalty for spilling a CSR VGPR.
-  if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
-    int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
-                                                    TargetStackID::SGPRSpill);
-
-    if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
-      llvm_unreachable("allocate SGPR spill should have worked");
-
-    MFI->FramePointerSaveIndex = NewFI;
+  LivePhysRegs LiveRegs;
+  LiveRegs.init(*TRI);
  
-    LLVM_DEBUG(
-      auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
-      dbgs() << "Spilling FP to  " << printReg(Spill.VGPR, TRI)
-             << ':' << Spill.Lane << '\n');
-    return;
+  if (WillHaveFP || hasFP(MF)) {
+    getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
+                                   MFI->FramePointerSaveIndex, true);
    }
  
-  // 2: Next, try to save the FP in an unused SGPR.
-  MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
-
-  if (!MFI->SGPRForFPSaveRestoreCopy) {
-    int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
-                                                    TargetStackID::SGPRSpill);
-
-    if (MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
-      // 3: There's no free lane to spill, and no free register to save FP, so
-      // we're forced to spill another VGPR to use for the spill.
-      MFI->FramePointerSaveIndex = NewFI;
-    } else {
-      // 4: If all else fails, spill the FP to memory.
-      MFI->FramePointerSaveIndex =
-          FrameInfo.CreateSpillStackObject(4, Align(4));
-    }
-
-    LLVM_DEBUG(
-      auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
-      dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)
-             << ':' << Spill.Lane << '\n';);
-  } else {
-    LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
-               printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n');
+  if (TRI->hasBasePointer(MF)) {
+    if (MFI->SGPRForFPSaveRestoreCopy)
+      LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
+    getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
+                                   MFI->BasePointerSaveIndex, false);
    }
  }
  
@@ -1058,14 +1210,31 @@ bool SIFrameLowering::assignCalleeSavedSpillSlots(
      return true; // Early exit if no callee saved registers are modified!
  
    const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
-  if (!FuncInfo->SGPRForFPSaveRestoreCopy)
+  if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
+      !FuncInfo->SGPRForBPSaveRestoreCopy)
      return false;
  
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIRegisterInfo *RI = ST.getRegisterInfo();
+  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
+  Register BasePtrReg = RI->getBaseRegister();
+  unsigned NumModifiedRegs = 0;
+
+  if (FuncInfo->SGPRForFPSaveRestoreCopy)
+    NumModifiedRegs++;
+  if (FuncInfo->SGPRForBPSaveRestoreCopy)
+    NumModifiedRegs++;
+
    for (auto &CS : CSI) {
-    if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
-      if (FuncInfo->SGPRForFPSaveRestoreCopy)
-        CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
-      break;
+    if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
+      CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+      if (--NumModifiedRegs)
+        break;
+    } else if (CS.getReg() == BasePtrReg &&
+               FuncInfo->SGPRForBPSaveRestoreCopy) {
+      CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
+      if (--NumModifiedRegs)
+        break;
      }
    }
  
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

index 8f25ebd..0668147 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -426,9 +426,9 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
  }
  
  void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
-  // The FP spill hasn't been inserted yet, so keep it around.
+  // The FP & BP spills haven't been inserted yet, so keep them around.
    for (auto &R : SGPRToVGPRSpills) {
-    if (R.first != FramePointerSaveIndex)
+    if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
        MFI.RemoveStackObject(R.first);
    }
  
@@ -436,7 +436,7 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
    // ID.
    for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
         ++i)
-    if (i != FramePointerSaveIndex)
+    if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
        MFI.setStackID(i, TargetStackID::Default);
  
    for (auto &R : VGPRToAGPRSpills) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

index 0d85136..7221e01 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -485,6 +485,11 @@ public: // FIXME
    Register SGPRForFPSaveRestoreCopy;
    Optional<int> FramePointerSaveIndex;
  
+  /// If this is set, an SGPR used for save/restore of the register used for the
+  /// base pointer.
+  Register SGPRForBPSaveRestoreCopy;
+  Optional<int> BasePointerSaveIndex;
+
    Register VGPRReservedForSGPRSpill;
    bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
  
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

index f528b80..255a164 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -112,6 +112,15 @@ Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
                          : FuncInfo->getStackPtrOffsetReg();
  }
  
+bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+  // When we need stack realignment, we can't reference off of the
+  // stack pointer, so we reserve a base pointer.
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  return MFI.getNumFixedObjects() && needsStackRealignment(MF);
+}
+
+Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
+
  const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
    return CSR_AMDGPU_AllVGPRs_RegMask;
  }
@@ -309,6 +318,12 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
      assert(!isSubRegister(ScratchRSrcReg, FrameReg));
    }
  
+  if (hasBasePointer(MF)) {
+    MCRegister BasePtrReg = getBaseRegister();
+    reserveRegisterTuples(Reserved, BasePtrReg);
+    assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
+  }
+
    for (MCRegister Reg : MFI->WWMReservedRegs) {
      reserveRegisterTuples(Reserved, Reg);
    }
@@ -1058,7 +1073,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
    MachineOperand &FIOp = MI->getOperand(FIOperandNum);
    int Index = MI->getOperand(FIOperandNum).getIndex();
  
-  Register FrameReg = getFrameRegister(*MF);
+  Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
+                          ? getBaseRegister()
+                          : getFrameRegister(*MF);
  
    switch (MI->getOpcode()) {
      // SGPR register spill
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h

index d8c3ce5..55ea00a 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -65,6 +65,9 @@ public:
  
    Register getFrameRegister(const MachineFunction &MF) const override;
  
+  bool hasBasePointer(const MachineFunction &MF) const;
+  Register getBaseRegister() const;
+
    bool canRealignStack(const MachineFunction &MF) const override;
    bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
  
diff --git a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll

index 0f5ad75..46a3903 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll
@@ -10,17 +10,17 @@
  define i32 @fp_save_restore_in_temp_sgpr(%struct.Data addrspace(5)* nocapture readonly byval(%struct.Data) align 4 %arg) #0 {
    ; GCN-LABEL: name: fp_save_restore_in_temp_sgpr
    ; GCN: bb.0.begin:
-  ; GCN:   liveins: $sgpr30_sgpr31, $sgpr7
+  ; GCN:   liveins: $sgpr7, $sgpr30_sgpr31
    ; GCN:   $sgpr7 = frame-setup COPY $sgpr33
    ; GCN:   $sgpr33 = frame-setup COPY $sgpr32
    ; GCN: bb.1.lp_end:
-  ; GCN:   liveins: $sgpr6, $vgpr1, $sgpr4_sgpr5, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31, $sgpr7
+  ; GCN:   liveins: $sgpr6, $sgpr7, $vgpr1, $sgpr4_sgpr5, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
    ; GCN: bb.2.lp_begin:
-  ; GCN:   liveins: $sgpr6, $vgpr1, $sgpr4_sgpr5, $sgpr8_sgpr9, $sgpr30_sgpr31, $sgpr7
+  ; GCN:   liveins: $sgpr6, $sgpr7, $vgpr1, $sgpr4_sgpr5, $sgpr8_sgpr9, $sgpr30_sgpr31
    ; GCN: bb.3.Flow:
-  ; GCN:   liveins: $sgpr6, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31, $sgpr7
+  ; GCN:   liveins: $sgpr6, $sgpr7, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
    ; GCN: bb.4.end:
-  ; GCN:   liveins: $vgpr0, $sgpr4_sgpr5, $sgpr30_sgpr31, $sgpr7
+  ; GCN:   liveins: $sgpr7, $vgpr0, $sgpr4_sgpr5, $sgpr30_sgpr31
    ; GCN:   $sgpr33 = frame-setup COPY $sgpr7
  begin:
    br label %lp_begin
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir

index f22acff..cbb5fa2 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
@@ -27,7 +27,7 @@ body:             |
      liveins: $vgpr1
  
      ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
-    ; CHECK: liveins: $vgpr1
+    ; CHECK: liveins: $sgpr27, $vgpr1
      ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
      ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
      ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
@@ -69,7 +69,7 @@ body:             |
      liveins: $vgpr1
  
      ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
-    ; CHECK: liveins: $vgpr1
+    ; CHECK: liveins: $sgpr27, $vgpr1
      ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
      ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
      ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
@@ -109,7 +109,7 @@ body:             |
      liveins: $vgpr1
  
      ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64
-    ; CHECK: liveins: $vgpr1
+    ; CHECK: liveins: $sgpr27, $vgpr1
      ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
      ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
      ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
@@ -148,7 +148,7 @@ body:             |
      liveins: $vgpr1
  
      ; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc
-    ; CHECK: liveins: $vgpr1
+    ; CHECK: liveins: $sgpr27, $vgpr1
      ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
      ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
      ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir

index ec3c0b7..4f47ce4 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
@@ -22,7 +22,7 @@ body:             |
      liveins: $vgpr1
  
      ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
-    ; CHECK: liveins: $vgpr1
+    ; CHECK: liveins: $sgpr27, $vgpr1
      ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
      ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
      ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir

index 2fbc51f..ae1a921 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
@@ -22,7 +22,7 @@ body:             |
      liveins: $vgpr1
  
      ; CHECK-LABEL: name: scavenge_sgpr_pei
-    ; CHECK: liveins: $vgpr1
+    ; CHECK: liveins: $sgpr27, $vgpr1
      ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
      ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 262080, implicit-def $scc
      ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294705152, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll

index 2a3cfe7..8b7557d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
@@ -148,7 +148,114 @@ define void @disable_realign_align128(i32 %idx) #3 {
    ret void
  }
  
+declare void @extern_func(<32 x i32>, i32) #0
+define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 {
+; The test forces the stack to be realigned to a new boundary
+; since there is a local object with an alignment of 1024.
+; Should use BP to access the incoming stack arguments.
+; The BP value is saved/restored with a VGPR spill.
+
+; GCN-LABEL: func_call_align1024_bp_gets_vgpr_spill:
+; GCN: buffer_store_dword [[VGPR_REG:v[0-9]+]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
+; GCN-NEXT: v_writelane_b32 [[VGPR_REG]], s33, 2
+; GCN-NEXT: v_writelane_b32 [[VGPR_REG]], s34, 3
+; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
+; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000
+
+; GCN: s_mov_b32 s34, s32
+; GCN-NEXT: v_mov_b32_e32 v32, 0
+
+; GCN: buffer_store_dword v32, off, s[0:3], s33 offset:1024
+; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34
+; GCN-NEXT: s_add_u32 s32, s32, 0x30000
+
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+
+; GCN: v_readlane_b32 s33, [[VGPR_REG]], 2
+; GCN-NEXT: s_sub_u32 s32, s32, 0x30000
+; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword [[VGPR_REG]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+  %temp = alloca i32, align 1024, addrspace(5)
+  store volatile i32 0, i32 addrspace(5)* %temp, align 1024
+  call void @extern_func(<32 x i32> %a, i32 %b)
+  ret void
+}
+
+%struct.Data = type { [9 x i32] }
+define i32 @needs_align1024_stack_args_used_inside_loop(%struct.Data addrspace(5)* nocapture readonly byval(%struct.Data) align 8 %arg) local_unnamed_addr #4 {
+; The local object allocation needed an alignment of 1024.
+; Since the function argument is accessed in a loop with an
+; index variable, the base pointer first get loaded into a VGPR
+; and that value should be further referenced to load the incoming values.
+; The BP value will get saved/restored in an SGPR at the prolgoue/epilogue.
+
+; GCN-LABEL: needs_align1024_stack_args_used_inside_loop:
+; GCN: s_mov_b32 [[BP_COPY:s[0-9]+]], s34
+; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
+; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
+; GCN-NEXT: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000
+; GCN-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NEXT: v_lshrrev_b32_e64 [[VGPR_REG:v[0-9]+]], 6, s34
+; GCN: s_add_u32 s32, s32, 0x30000
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:1024
+; GCN: buffer_load_dword v{{[0-9]+}}, [[VGPR_REG]], s[0:3], 0 offen
+; GCN: v_add_u32_e32 [[VGPR_REG]], vcc, 4, [[VGPR_REG]]
+; GCN: s_sub_u32 s32, s32, 0x30000
+; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
+; GCN-NEXT: s_mov_b32 s34, [[BP_COPY]]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+begin:
+  %local_var = alloca i32, align 1024, addrspace(5)
+  store volatile i32 0, i32 addrspace(5)* %local_var, align 1024
+  br label %loop_body
+
+loop_end:                                                ; preds = %loop_body
+  %idx_next = add nuw nsw i32 %lp_idx, 1
+  %lp_exit_cond = icmp eq i32 %idx_next, 9
+  br i1 %lp_exit_cond, label %exit, label %loop_body
+
+loop_body:                                                ; preds = %loop_end, %begin
+  %lp_idx = phi i32 [ 0, %begin ], [ %idx_next, %loop_end ]
+  %ptr = getelementptr inbounds %struct.Data, %struct.Data addrspace(5)* %arg, i32 0, i32 0, i32 %lp_idx
+  %val = load i32, i32 addrspace(5)* %ptr, align 8
+  %lp_cond = icmp eq i32 %val, %lp_idx
+  br i1 %lp_cond, label %loop_end, label %exit
+
+exit:                                               ; preds = %loop_end, %loop_body
+  %out = phi i32 [ 0, %loop_body ], [ 1, %loop_end ]
+  ret i32 %out
+}
+
+define void @no_free_scratch_sgpr_for_bp_copy(<32 x i32> %a, i32 %b) #0 {
+; GCN-LABEL: no_free_scratch_sgpr_for_bp_copy:
+; GCN: ; %bb.0:
+; GCN: v_writelane_b32 [[VGPR_REG:v[0-9]+]], s34, 0
+; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:128
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN: v_readlane_b32 s34, [[VGPR_REG:v[0-9]+]], 0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+  %local_val = alloca i32, align 128, addrspace(5)
+  store volatile i32 %b, i32 addrspace(5)* %local_val, align 128
+  ; Use all clobberable registers, so BP has to spill to a VGPR.
+  call void asm sideeffect "",
+    "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
+    ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
+    ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
+    ,~{vcc_hi}"() #0
+  ret void
+}
+
  attributes #0 = { noinline nounwind }
  attributes #1 = { noinline nounwind "stackrealign" }
  attributes #2 = { noinline nounwind alignstack=4 }
  attributes #3 = { noinline nounwind "no-realign-stack" }
+attributes #4 = { noinline nounwind "frame-pointer"="all"}
author	Christudasan Devadasan <Christudasan.Devadasan@amd.com>
	Tue, 21 Apr 2020 09:34:33 +0000 (15:04 +0530)
committer	Christudasan Devadasan <Christudasan.Devadasan@amd.com>
	Sun, 17 May 2020 10:43:55 +0000 (16:13 +0530)
llvm/docs/AMDGPUUsage.rst		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIRegisterInfo.h		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/stack-realign.ll		patch \| blob \| history