From dad553a5cf9d49493d64a8d55683338336f1a9f9 Mon Sep 17 00:00:00 2001 From: Marek Olsak Date: Fri, 25 Nov 2016 16:03:27 +0000 Subject: [PATCH] Revert "AMDGPU: Fix MMO when splitting spill" This reverts commit 79d4f8b8b1ce430c3d5dac4fc72a9eebaed24fe1. llvm-svn: 287935 --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 115 ++++++++------------- llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 11 +- .../CodeGen/AMDGPU/control-flow-fastregalloc.ll | 30 +++--- llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll | 16 +-- .../AMDGPU/vgpr-spill-emergency-stack-slot.ll | 4 +- 5 files changed, 72 insertions(+), 104 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 16f0692..948ea11 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -407,36 +407,28 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, - int Index, - unsigned ValueReg, - bool IsKill, + const MachineOperand *SrcDst, unsigned ScratchRsrcReg, - unsigned ScratchOffsetReg, - int64_t InstOffset, - MachineMemOperand *MMO, + unsigned ScratchOffset, + int64_t Offset, RegScavenger *RS) const { + unsigned Value = SrcDst->getReg(); + bool IsKill = SrcDst->isKill(); MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); const SISubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); - const MachineFrameInfo &MFI = MF->getFrameInfo(); - const MCInstrDesc &Desc = TII->get(LoadStoreOp); - const DebugLoc &DL = MI->getDebugLoc(); - bool IsStore = Desc.mayStore(); + DebugLoc DL = MI->getDebugLoc(); + bool IsStore = MI->mayStore(); bool RanOutOfSGPRs = false; bool Scavenged = false; - unsigned SOffset = ScratchOffsetReg; + unsigned SOffset = ScratchOffset; + unsigned OriginalImmOffset = Offset; - const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); - unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32; + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned Size = NumSubRegs * 4; - int64_t Offset = InstOffset + MFI.getObjectOffset(Index); - const int64_t OriginalImmOffset = Offset; - - unsigned Align = MFI.getObjectAlignment(Index); - const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); if (!isUInt<12>(Offset + Size)) { SOffset = AMDGPU::NoRegister; @@ -455,23 +447,19 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, // subtract the offset after the spill to return ScratchOffset to it's // original value. RanOutOfSGPRs = true; - SOffset = ScratchOffsetReg; + SOffset = ScratchOffset; } else { Scavenged = true; } - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) - .addReg(ScratchOffsetReg) - .addImm(Offset); - + .addReg(ScratchOffset) + .addImm(Offset); Offset = 0; } - const unsigned EltSize = 4; - - for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) { + for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) { unsigned SubReg = NumSubRegs == 1 ? - ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i)); + Value : getSubReg(Value, getSubRegFromChannel(i)); unsigned SOffsetRegState = 0; unsigned SrcDstRegState = getDefRegState(!IsStore); @@ -481,12 +469,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, SrcDstRegState |= getKillRegState(IsKill); } - MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i); - MachineMemOperand *NewMMO - = MF->getMachineMemOperand(PInfo, MMO->getFlags(), - EltSize, MinAlign(Align, EltSize * i)); - - BuildMI(*MBB, MI, DL, Desc) + BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) .addReg(SubReg, getDefRegState(!IsStore)) .addReg(ScratchRsrcReg) .addReg(SOffset, SOffsetRegState) @@ -494,15 +477,14 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, .addImm(0) // glc .addImm(0) // slc .addImm(0) // tfe - .addMemOperand(NewMMO) - .addReg(ValueReg, RegState::Implicit | SrcDstRegState); + .addReg(Value, RegState::Implicit | SrcDstRegState) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); } - if (RanOutOfSGPRs) { // Subtract the offset we added to the ScratchOffset register. - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg) - .addReg(ScratchOffsetReg) - .addImm(OriginalImmOffset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffset) + .addReg(ScratchOffset) + .addImm(OriginalImmOffset); } } @@ -525,8 +507,6 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; - const unsigned EltSize = 4; - // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { @@ -548,12 +528,13 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, } int64_t FrOffset = FrameInfo.getObjectOffset(Index); + unsigned Size = FrameInfo.getObjectSize(Index); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); + = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - EltSize, MinAlign(Align, EltSize * i)); + Size, Align); unsigned OffsetReg = AMDGPU::M0; // Add i * 4 wave offset. @@ -626,12 +607,13 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); } + unsigned Size = FrameInfo.getObjectSize(Index); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); + = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - EltSize, MinAlign(Align, EltSize * i)); + Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) .addReg(TmpReg, RegState::Kill) // src .addFrameIndex(Index) // vaddr @@ -672,19 +654,18 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int64_t FrOffset = FrameInfo.getObjectOffset(Index); - const unsigned EltSize = 4; - for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); if (SpillToSMEM) { + unsigned Size = FrameInfo.getObjectSize(Index); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); + = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - EltSize, MinAlign(Align, EltSize * i)); + Size, Align); unsigned OffsetReg = AMDGPU::M0; @@ -722,15 +703,16 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. + unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned Align = FrameInfo.getObjectAlignment(Index); + unsigned Size = FrameInfo.getObjectSize(Index); MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); + = MachinePointerInfo::getFixedStack(*MF, Index); - MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, - MachineMemOperand::MOLoad, EltSize, - MinAlign(Align, EltSize * i)); + MachineMemOperand *MMO = MF->getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // vaddr @@ -738,7 +720,8 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, .addReg(MFI->getScratchWaveOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) + BuildMI(*MBB, MI, DL, + TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } @@ -794,38 +777,28 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: - case AMDGPU::SI_SPILL_V32_SAVE: { - const MachineOperand *VData = TII->getNamedOperand(*MI, - AMDGPU::OpName::vdata); + case AMDGPU::SI_SPILL_V32_SAVE: buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, - Index, - VData->getReg(), VData->isKill(), + TII->getNamedOperand(*MI, AMDGPU::OpName::vdata), TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), - TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), - *MI->memoperands_begin(), - RS); + FrameInfo.getObjectOffset(Index) + + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); MI->eraseFromParent(); break; - } case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { - const MachineOperand *VData = TII->getNamedOperand(*MI, - AMDGPU::OpName::vdata); - buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, - Index, - VData->getReg(), VData->isKill(), + TII->getNamedOperand(*MI, AMDGPU::OpName::vdata), TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), - TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), - *MI->memoperands_begin(), - RS); + FrameInfo.getObjectOffset(Index) + + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); MI->eraseFromParent(); break; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index bd83ef1..ed960c9 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -253,14 +253,9 @@ public: private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, - unsigned LoadStoreOp, - int Index, - unsigned ValueReg, - bool ValueIsKill, - unsigned ScratchRsrcReg, - unsigned ScratchOffsetReg, - int64_t InstrOffset, - MachineMemOperand *MMO, + unsigned LoadStoreOp, const MachineOperand *SrcDst, + unsigned ScratchRsrcReg, unsigned ScratchOffset, + int64_t Offset, RegScavenger *RS) const; }; diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 13383cb..226def9 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -26,9 +26,9 @@ ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 ; 8-byte Folded Spill ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:4 ; 8-byte Folded Spill ; Spill load ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill @@ -55,11 +55,11 @@ -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 ; 8-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:4 ; 8-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] @@ -108,9 +108,9 @@ endif: ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:16 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:16 ; 8-byte Folded Spill ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:20 ; 8-byte Folded Spill ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} @@ -133,11 +133,11 @@ endif: ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:16 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:16 ; 8-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:20 ; 8-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] @@ -187,9 +187,9 @@ end: ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET:[0-9]+]] ; 8-byte Folded Spill ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 8-byte Folded Spill ; GCN: s_mov_b64 exec, [[CMP0]] ; GCN: s_waitcnt vmcnt(0) expcnt(0) @@ -208,7 +208,7 @@ end: ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC_LO]] -; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET]] ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET]] ; 8-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC_HI]] @@ -224,9 +224,9 @@ end: ; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_LO:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]] -; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET:[0-9]+]] ; 8-byte Folded Spill ; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_HI:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]] -; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET:[0-9]+]] ; 8-byte Folded Spill ; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} @@ -255,11 +255,11 @@ end: ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET]] ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET]] ; 8-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET]] ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET]] ; 8-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll index ed397f5..f267eb4 100644 --- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -17,26 +17,26 @@ ; Make sure scratch wave offset register is correctly incremented and ; then restored. ; SMEM: s_mov_b32 m0, s91{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill +; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill ; SMEM: s_add_u32 m0, s91, 0x100{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill +; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill ; SMEM: s_add_u32 m0, s91, 0x200{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill +; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill ; SMEM: s_add_u32 m0, s91, 0x300{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill +; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill ; SMEM: s_mov_b32 m0, s91{{$}} -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload +; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload ; SMEM: s_add_u32 m0, s91, 0x100{{$}} ; SMEM: s_waitcnt lgkmcnt(0) -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload +; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload ; SMEM: s_add_u32 m0, s91, 0x200{{$}} ; SMEM: s_waitcnt lgkmcnt(0) -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload +; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload ; SMEM: s_add_u32 m0, s91, 0x300{{$}} ; SMEM: s_waitcnt lgkmcnt(0) -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload +; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload ; ALL: s_endpgm define void @test(i32 addrspace(1)* %out, i32 %in) { diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll index 35a014b..52fa0be 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -20,8 +20,8 @@ ; VI-DAG: s_mov_b32 s15, 0xe80000 ; s11 is offset system SGPR -; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 4-byte Folded Spill -; GCN: buffer_load_dword v{{[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 4-byte Folded Reload +; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Spill +; GCN: buffer_load_dword v{{[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Reload ; GCN: NumVgprs: 256 ; GCN: ScratchSize: 1024 -- 2.7.4