AMDGPU: Disallow spill folding with m0 copies

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 30 Oct 2019 19:56:24 +0000 (12:56 -0700)

committer Matt Arsenault <arsenm2@gmail.com>

Wed, 30 Oct 2019 21:56:33 +0000 (14:56 -0700)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 30 Oct 2019 19:56:24 +0000 (12:56 -0700)
committer Matt Arsenault <arsenm2@gmail.com>
Wed, 30 Oct 2019 21:56:33 +0000 (14:56 -0700)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

index 272a7fc..fee2d72 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1062,6 +1062,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
  
    if (RI.isSGPRClass(RC)) {
      MFI->setHasSpilledSGPRs();
+    assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled");
  
      // We are only allowed to create one new instruction when spilling
      // registers, so we need to use pseudo instruction for spilling SGPRs.
@@ -1190,6 +1191,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
  
    if (RI.isSGPRClass(RC)) {
      MFI->setHasSpilledSGPRs();
+    assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into");
  
      // FIXME: Maybe this should not include a memoperand because it will be
      // lowered to non-memory instructions.
@@ -6558,3 +6560,36 @@ MachineInstr *SIInstrInfo::createPHISourceCopy(
  }
  
  bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); }
+
+MachineInstr *SIInstrInfo::foldMemoryOperandImpl(
+    MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+    MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
+    VirtRegMap *VRM) const {
+  // This is a bit of a hack (copied from AArch64). Consider this instruction:
+  //
+  //   %0:sreg_32 = COPY $m0
+  //
+  // We explicitly chose SReg_32 for the virtual register so such a copy might
+  // be eliminated by RegisterCoalescer. However, that may not be possible, and
+  // %0 may even spill. We can't spill $m0 normally (it would require copying to
+  // a numbered SGPR anyway), and since it is in the SReg_32 register class,
+  // TargetInstrInfo::foldMemoryOperand() is going to try.
+  //
+  // To prevent that, constrain the %0 register class here.
+  if (MI.isFullCopy()) {
+    Register DstReg = MI.getOperand(0).getReg();
+    Register SrcReg = MI.getOperand(1).getReg();
+
+    if (DstReg == AMDGPU::M0 && SrcReg.isVirtual()) {
+      MF.getRegInfo().constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
+      return nullptr;
+    }
+
+    if (SrcReg == AMDGPU::M0 && DstReg.isVirtual()) {
+      MF.getRegInfo().constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
+      return nullptr;
+    }
+  }
+
+  return nullptr;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h

index be46344..30c2f74 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1027,6 +1027,13 @@ public:
    }
  
    void fixImplicitOperands(MachineInstr &MI) const;
+
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
+                                      ArrayRef<unsigned> Ops,
+                                      MachineBasicBlock::iterator InsertPt,
+                                      int FrameIndex,
+                                      LiveIntervals *LIS = nullptr,
+                                      VirtRegMap *VRM = nullptr) const override;
  };
  
  /// \brief Returns true if a reg:subreg pair P has a TRC class
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir

new file mode 100644 (file)

index 0000000..a4dab19
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
@@ -0,0 +1,58 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stress-regalloc=2 -start-before=greedy -stop-after=virtregmap -o - %s | FileCheck %s
+
+# Test that a spill of a copy of m0 is not folded to be a spill of m0 directly.
+
+---
+
+name:            merge_sgpr_spill_into_copy_from_m0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_m0
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: S_WAITCNT 0
+    ; CHECK: S_NOP 0, implicit-def $m0
+    ; CHECK: $sgpr0 = S_MOV_B32 $m0
+    ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
+    ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0
+    ; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
+    ; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0
+    ; CHECK: $m0 = S_MOV_B32 killed $sgpr0
+    ; CHECK: S_NOP 0
+    ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
+    S_NOP 0, implicit-def $m0
+    %0:sreg_32 = COPY $m0
+    S_NOP 0, implicit-def %1:sreg_32, implicit-def %2:sreg_32, implicit %0
+    $m0 = COPY %0
+    S_SENDMSG 0, implicit $m0, implicit $exec
+
+...
+
+# Test that a reload into a copy of m0 is not folded to be a reload of m0 directly.
+
+---
+
+name:            reload_sgpr_spill_into_copy_to_m0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_m0
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: S_WAITCNT 0
+    ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0
+    ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0
+    ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0
+    ; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
+    ; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0
+    ; CHECK: $m0 = S_MOV_B32 killed $sgpr0
+    ; CHECK: S_NOP 0
+    ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
+    S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $m0
+    S_NOP 0, implicit %0, implicit-def %3:sreg_32, implicit-def %4:sreg_32
+    $m0 = COPY %0
+    S_SENDMSG 0, implicit $m0, implicit $exec
+
+...
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 30 Oct 2019 19:56:24 +0000 (12:56 -0700)
committer	Matt Arsenault <arsenm2@gmail.com>
	Wed, 30 Oct 2019 21:56:33 +0000 (14:56 -0700)
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.h		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir	[new file with mode: 0644]	patch \| blob