From a3d7b3121cfd038c21c3b19002aa1d695de993c3 Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Thu, 23 Feb 2023 08:46:18 +0100 Subject: [PATCH] [AMDGPU][NFC] Add getMaxMUBUFImmOffset Replace magic constant 4095 with the function getMaxMUBUFImmOffset(). Differential Revision: https://reviews.llvm.org/D144623 --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 ++++-- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 5 +++-- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 17 +++++++++-------- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 17 +++++++++-------- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 17 +++++++++-------- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 9 ++++++--- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 ++ 7 files changed, 42 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 4375983..35ff1c2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1375,13 +1375,15 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS); // Don't fold null pointer. if (Imm != NullPtr) { - SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); + const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(); + SDValue HighBits = + CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32); MachineSDNode *MovHighBits = CurDAG->getMachineNode( AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits); VAddr = SDValue(MovHighBits, 0); SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); - ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); + ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i16); return true; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 583d853..edc841a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4195,9 +4195,10 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { // TODO: Should this be inside the render function? The iterator seems to // move. + const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(); BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), HighBits) - .addImm(Offset & ~4095); + .addImm(Offset & ~MaxOffset); return {{[=](MachineInstrBuilder &MIB) { // rsrc MIB.addReg(Info->getScratchRSrcReg()); @@ -4211,7 +4212,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { MIB.addImm(0); }, [=](MachineInstrBuilder &MIB) { // offset - MIB.addImm(Offset & 4095); + MIB.addImm(Offset & MaxOffset); }}}; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 52f0629..f5805c0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4255,7 +4255,7 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, std::pair AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const { - const unsigned MaxImm = 4095; + const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(); Register BaseReg; unsigned ImmOffset; const LLT S32 = LLT::scalar(32); @@ -4268,13 +4268,14 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B, if (MRI.getType(BaseReg).isPointer()) BaseReg = B.buildPtrToInt(MRI.getType(OrigOffset), BaseReg).getReg(0); - // If the immediate value is too big for the immoffset field, put the value - // and -4096 into the immoffset field so that the value that is copied/added - // for the voffset field is a multiple of 4096, and it stands more chance - // of being CSEd with the copy/add for another similar load/store. - // However, do not do that rounding down to a multiple of 4096 if that is a - // negative number, as it appears to be illegal to have a negative offset - // in the vgpr, even if adding the immediate offset makes it positive. + // If the immediate value is too big for the immoffset field, put only bits + // that would normally fit in the immoffset field. The remaining value that + // is copied/added for the voffset field is a large power of 2, and it + // stands more chance of being CSEd with the copy/add for another similar + // load/store. + // However, do not do that rounding down if that is a negative + // number, as it appears to be illegal to have a negative offset in the + // vgpr, even if adding the immediate offset makes it positive. unsigned Overflow = ImmOffset & ~MaxImm; ImmOffset -= Overflow; if ((int32_t)Overflow < 0) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 7e4dfd9..baad6e05 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1791,7 +1791,7 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { std::pair AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const { - const unsigned MaxImm = 4095; + const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(); Register BaseReg; unsigned ImmOffset; const LLT S32 = LLT::scalar(32); @@ -1802,13 +1802,14 @@ AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B, unsigned C1 = 0; if (ImmOffset != 0) { - // If the immediate value is too big for the immoffset field, put the value - // and -4096 into the immoffset field so that the value that is copied/added - // for the voffset field is a multiple of 4096, and it stands more chance - // of being CSEd with the copy/add for another similar load/store. - // However, do not do that rounding down to a multiple of 4096 if that is a - // negative number, as it appears to be illegal to have a negative offset - // in the vgpr, even if adding the immediate offset makes it positive. + // If the immediate value is too big for the immoffset field, put only bits + // that would normally fit in the immoffset field. The remaining value that + // is copied/added for the voffset field is a large power of 2, and it + // stands more chance of being CSEd with the copy/add for another similar + // load/store. + // However, do not do that rounding down if that is a negative + // number, as it appears to be illegal to have a negative offset in the + // vgpr, even if adding the immediate offset makes it positive. unsigned Overflow = ImmOffset & ~MaxImm; ImmOffset -= Overflow; if ((int32_t)Overflow < 0) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c247abf..1eb2e29 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8495,7 +8495,7 @@ SDValue SITargetLowering::makeV_ILLEGAL(SDValue Op, SelectionDAG & DAG) const { std::pair SITargetLowering::splitBufferOffsets( SDValue Offset, SelectionDAG &DAG) const { SDLoc DL(Offset); - const unsigned MaxImm = 4095; + const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(); SDValue N0 = Offset; ConstantSDNode *C1 = nullptr; @@ -8508,13 +8508,14 @@ std::pair SITargetLowering::splitBufferOffsets( if (C1) { unsigned ImmOffset = C1->getZExtValue(); - // If the immediate value is too big for the immoffset field, put the value - // and -4096 into the immoffset field so that the value that is copied/added - // for the voffset field is a multiple of 4096, and it stands more chance - // of being CSEd with the copy/add for another similar load/store. - // However, do not do that rounding down to a multiple of 4096 if that is a - // negative number, as it appears to be illegal to have a negative offset - // in the vgpr, even if adding the immediate offset makes it positive. + // If the immediate value is too big for the immoffset field, put only bits + // that would normally fit in the immoffset field. The remaining value that + // is copied/added for the voffset field is a large power of 2, and it + // stands more chance of being CSEd with the copy/add for another similar + // load/store. + // However, do not do that rounding down if that is a negative + // number, as it appears to be illegal to have a negative offset in the + // vgpr, even if adding the immediate offset makes it positive. unsigned Overflow = ImmOffset & ~MaxImm; ImmOffset -= Overflow; if ((int32_t)Overflow < 0) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index b9b5091..463d89b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7874,6 +7874,8 @@ const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) con } } +unsigned SIInstrInfo::getMaxMUBUFImmOffset() { return (1 << 12) - 1; } + void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const { if (!ST.isWave32()) return; @@ -7906,7 +7908,8 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { // offsets within the given alignment can be added to the resulting ImmOffset. bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment) const { - const uint32_t MaxImm = alignDown(4095, Alignment.value()); + const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(); + const uint32_t MaxImm = alignDown(MaxOffset, Alignment.value()); uint32_t Overflow = 0; if (Imm > MaxImm) { @@ -7924,8 +7927,8 @@ bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, // // Atomic operations fail to work correctly when individual address // components are unaligned, even if their sum is aligned. - uint32_t High = (Imm + Alignment.value()) & ~4095; - uint32_t Low = (Imm + Alignment.value()) & 4095; + uint32_t High = (Imm + Alignment.value()) & ~MaxOffset; + uint32_t Low = (Imm + Alignment.value()) & MaxOffset; Imm = Low; Overflow = High - Alignment.value(); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 9dfa986..be1bc0d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1135,6 +1135,8 @@ public: return isUInt<12>(Imm); } + static unsigned getMaxMUBUFImmOffset(); + bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment = Align(4)) const; -- 2.7.4