[AMDGPU][NFC] Add getMaxMUBUFImmOffset

author Piotr Sobczak <Piotr.Sobczak@amd.com>

Thu, 23 Feb 2023 07:46:18 +0000 (08:46 +0100)

committer Piotr Sobczak <Piotr.Sobczak@amd.com>

Thu, 23 Feb 2023 10:29:59 +0000 (11:29 +0100)
author Piotr Sobczak <Piotr.Sobczak@amd.com>
Thu, 23 Feb 2023 07:46:18 +0000 (08:46 +0100)
committer Piotr Sobczak <Piotr.Sobczak@amd.com>
Thu, 23 Feb 2023 10:29:59 +0000 (11:29 +0100)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

index 4375983..35ff1c2 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1375,13 +1375,15 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
          AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
      // Don't fold null pointer.
      if (Imm != NullPtr) {
-      SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
+      const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+      SDValue HighBits =
+          CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
        MachineSDNode *MovHighBits = CurDAG->getMachineNode(
          AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
        VAddr = SDValue(MovHighBits, 0);
  
        SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
-      ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
+      ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i16);
        return true;
      }
    }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

index 583d853..edc841a 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4195,9 +4195,10 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
  
      // TODO: Should this be inside the render function? The iterator seems to
      // move.
+    const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
      BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
              HighBits)
-      .addImm(Offset & ~4095);
+        .addImm(Offset & ~MaxOffset);
  
      return {{[=](MachineInstrBuilder &MIB) { // rsrc
                 MIB.addReg(Info->getScratchRSrcReg());
@@ -4211,7 +4212,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
                 MIB.addImm(0);
               },
               [=](MachineInstrBuilder &MIB) { // offset
-               MIB.addImm(Offset & 4095);
+               MIB.addImm(Offset & MaxOffset);
               }}};
    }
  
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

index 52f0629..f5805c0 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4255,7 +4255,7 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
  std::pair<Register, unsigned>
  AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
                                          Register OrigOffset) const {
-  const unsigned MaxImm = 4095;
+  const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
    Register BaseReg;
    unsigned ImmOffset;
    const LLT S32 = LLT::scalar(32);
@@ -4268,13 +4268,14 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
    if (MRI.getType(BaseReg).isPointer())
      BaseReg = B.buildPtrToInt(MRI.getType(OrigOffset), BaseReg).getReg(0);
  
-  // If the immediate value is too big for the immoffset field, put the value
-  // and -4096 into the immoffset field so that the value that is copied/added
-  // for the voffset field is a multiple of 4096, and it stands more chance
-  // of being CSEd with the copy/add for another similar load/store.
-  // However, do not do that rounding down to a multiple of 4096 if that is a
-  // negative number, as it appears to be illegal to have a negative offset
-  // in the vgpr, even if adding the immediate offset makes it positive.
+  // If the immediate value is too big for the immoffset field, put only bits
+  // that would normally fit in the immoffset field. The remaining value that
+  // is copied/added for the voffset field is a large power of 2, and it
+  // stands more chance of being CSEd with the copy/add for another similar
+  // load/store.
+  // However, do not do that rounding down if that is a negative
+  // number, as it appears to be illegal to have a negative offset in the
+  // vgpr, even if adding the immediate offset makes it positive.
    unsigned Overflow = ImmOffset & ~MaxImm;
    ImmOffset -= Overflow;
    if ((int32_t)Overflow < 0) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

index 7e4dfd9..baad6e0 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1791,7 +1791,7 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
  std::pair<Register, unsigned>
  AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
                                             Register OrigOffset) const {
-  const unsigned MaxImm = 4095;
+  const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
    Register BaseReg;
    unsigned ImmOffset;
    const LLT S32 = LLT::scalar(32);
@@ -1802,13 +1802,14 @@ AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
  
    unsigned C1 = 0;
    if (ImmOffset != 0) {
-    // If the immediate value is too big for the immoffset field, put the value
-    // and -4096 into the immoffset field so that the value that is copied/added
-    // for the voffset field is a multiple of 4096, and it stands more chance
-    // of being CSEd with the copy/add for another similar load/store.
-    // However, do not do that rounding down to a multiple of 4096 if that is a
-    // negative number, as it appears to be illegal to have a negative offset
-    // in the vgpr, even if adding the immediate offset makes it positive.
+    // If the immediate value is too big for the immoffset field, put only bits
+    // that would normally fit in the immoffset field. The remaining value that
+    // is copied/added for the voffset field is a large power of 2, and it
+    // stands more chance of being CSEd with the copy/add for another similar
+    // load/store.
+    // However, do not do that rounding down if that is a negative
+    // number, as it appears to be illegal to have a negative offset in the
+    // vgpr, even if adding the immediate offset makes it positive.
      unsigned Overflow = ImmOffset & ~MaxImm;
      ImmOffset -= Overflow;
      if ((int32_t)Overflow < 0) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index c247abf..1eb2e29 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8495,7 +8495,7 @@ SDValue SITargetLowering::makeV_ILLEGAL(SDValue Op, SelectionDAG & DAG) const {
  std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
      SDValue Offset, SelectionDAG &DAG) const {
    SDLoc DL(Offset);
-  const unsigned MaxImm = 4095;
+  const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
    SDValue N0 = Offset;
    ConstantSDNode *C1 = nullptr;
  
@@ -8508,13 +8508,14 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
  
    if (C1) {
      unsigned ImmOffset = C1->getZExtValue();
-    // If the immediate value is too big for the immoffset field, put the value
-    // and -4096 into the immoffset field so that the value that is copied/added
-    // for the voffset field is a multiple of 4096, and it stands more chance
-    // of being CSEd with the copy/add for another similar load/store.
-    // However, do not do that rounding down to a multiple of 4096 if that is a
-    // negative number, as it appears to be illegal to have a negative offset
-    // in the vgpr, even if adding the immediate offset makes it positive.
+    // If the immediate value is too big for the immoffset field, put only bits
+    // that would normally fit in the immoffset field. The remaining value that
+    // is copied/added for the voffset field is a large power of 2, and it
+    // stands more chance of being CSEd with the copy/add for another similar
+    // load/store.
+    // However, do not do that rounding down if that is a negative
+    // number, as it appears to be illegal to have a negative offset in the
+    // vgpr, even if adding the immediate offset makes it positive.
      unsigned Overflow = ImmOffset & ~MaxImm;
      ImmOffset -= Overflow;
      if ((int32_t)Overflow < 0) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

index b9b5091..463d89b 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7874,6 +7874,8 @@ const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) con
    }
  }
  
+unsigned SIInstrInfo::getMaxMUBUFImmOffset() { return (1 << 12) - 1; }
+
  void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const {
    if (!ST.isWave32())
      return;
@@ -7906,7 +7908,8 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
  // offsets within the given alignment can be added to the resulting ImmOffset.
  bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset,
                                     uint32_t &ImmOffset, Align Alignment) const {
-  const uint32_t MaxImm = alignDown(4095, Alignment.value());
+  const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+  const uint32_t MaxImm = alignDown(MaxOffset, Alignment.value());
    uint32_t Overflow = 0;
  
    if (Imm > MaxImm) {
@@ -7924,8 +7927,8 @@ bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset,
        //
        // Atomic operations fail to work correctly when individual address
        // components are unaligned, even if their sum is aligned.
-      uint32_t High = (Imm + Alignment.value()) & ~4095;
-      uint32_t Low = (Imm + Alignment.value()) & 4095;
+      uint32_t High = (Imm + Alignment.value()) & ~MaxOffset;
+      uint32_t Low = (Imm + Alignment.value()) & MaxOffset;
        Imm = Low;
        Overflow = High - Alignment.value();
      }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h

index 9dfa986..be1bc0d 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1135,6 +1135,8 @@ public:
      return isUInt<12>(Imm);
    }
  
+  static unsigned getMaxMUBUFImmOffset();
+
    bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
                          Align Alignment = Align(4)) const;
author	Piotr Sobczak <Piotr.Sobczak@amd.com>
	Thu, 23 Feb 2023 07:46:18 +0000 (08:46 +0100)
committer	Piotr Sobczak <Piotr.Sobczak@amd.com>
	Thu, 23 Feb 2023 10:29:59 +0000 (11:29 +0100)
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.h		patch \| blob \| history