Replace magic constant 4095 with the function getMaxMUBUFImmOffset().
Differential Revision: https://reviews.llvm.org/D144623
AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
// Don't fold null pointer.
if (Imm != NullPtr) {
- SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ SDValue HighBits =
+ CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
MachineSDNode *MovHighBits = CurDAG->getMachineNode(
AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
VAddr = SDValue(MovHighBits, 0);
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
- ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
+ ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i16);
return true;
}
}
// TODO: Should this be inside the render function? The iterator seems to
// move.
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
HighBits)
- .addImm(Offset & ~4095);
+ .addImm(Offset & ~MaxOffset);
return {{[=](MachineInstrBuilder &MIB) { // rsrc
MIB.addReg(Info->getScratchRSrcReg());
MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { // offset
- MIB.addImm(Offset & 4095);
+ MIB.addImm(Offset & MaxOffset);
}}};
}
std::pair<Register, unsigned>
AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = 4095;
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
if (MRI.getType(BaseReg).isPointer())
BaseReg = B.buildPtrToInt(MRI.getType(OrigOffset), BaseReg).getReg(0);
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
+ // If the immediate value is too big for the immoffset field, put only bits
+ // that would normally fit in the immoffset field. The remaining value that
+ // is copied/added for the voffset field is a large power of 2, and it
+ // stands more chance of being CSEd with the copy/add for another similar
+ // load/store.
+ // However, do not do that rounding down if that is a negative
+ // number, as it appears to be illegal to have a negative offset in the
+ // vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
std::pair<Register, unsigned>
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = 4095;
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
unsigned C1 = 0;
if (ImmOffset != 0) {
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
+ // If the immediate value is too big for the immoffset field, put only bits
+ // that would normally fit in the immoffset field. The remaining value that
+ // is copied/added for the voffset field is a large power of 2, and it
+ // stands more chance of being CSEd with the copy/add for another similar
+ // load/store.
+ // However, do not do that rounding down if that is a negative
+ // number, as it appears to be illegal to have a negative offset in the
+ // vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
SDValue Offset, SelectionDAG &DAG) const {
SDLoc DL(Offset);
- const unsigned MaxImm = 4095;
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
SDValue N0 = Offset;
ConstantSDNode *C1 = nullptr;
if (C1) {
unsigned ImmOffset = C1->getZExtValue();
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
+ // If the immediate value is too big for the immoffset field, put only bits
+ // that would normally fit in the immoffset field. The remaining value that
+ // is copied/added for the voffset field is a large power of 2, and it
+ // stands more chance of being CSEd with the copy/add for another similar
+ // load/store.
+ // However, do not do that rounding down if that is a negative
+ // number, as it appears to be illegal to have a negative offset in the
+ // vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
}
}
+unsigned SIInstrInfo::getMaxMUBUFImmOffset() { return (1 << 12) - 1; }
+
void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const {
if (!ST.isWave32())
return;
// offsets within the given alignment can be added to the resulting ImmOffset.
bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset,
uint32_t &ImmOffset, Align Alignment) const {
- const uint32_t MaxImm = alignDown(4095, Alignment.value());
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ const uint32_t MaxImm = alignDown(MaxOffset, Alignment.value());
uint32_t Overflow = 0;
if (Imm > MaxImm) {
//
// Atomic operations fail to work correctly when individual address
// components are unaligned, even if their sum is aligned.
- uint32_t High = (Imm + Alignment.value()) & ~4095;
- uint32_t Low = (Imm + Alignment.value()) & 4095;
+ uint32_t High = (Imm + Alignment.value()) & ~MaxOffset;
+ uint32_t Low = (Imm + Alignment.value()) & MaxOffset;
Imm = Low;
Overflow = High - Alignment.value();
}
return isUInt<12>(Imm);
}
+ static unsigned getMaxMUBUFImmOffset();
+
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
Align Alignment = Align(4)) const;