From 58f61a84e726691b7483f2bd3b47dd40b67df657 Mon Sep 17 00:00:00 2001 From: Marek Olsak Date: Sun, 7 Dec 2014 17:17:38 +0000 Subject: [PATCH] R600/SI: Set 20-bit immediate byte offset for SMRD on VI llvm-svn: 223614 --- llvm/lib/Target/R600/SIISelLowering.cpp | 7 +++++- llvm/lib/Target/R600/SIInstrInfo.cpp | 36 ++++++++++++++++++---------- llvm/lib/Target/R600/SIInstrInfo.h | 2 +- llvm/lib/Target/R600/SIInstrInfo.td | 4 ++++ llvm/lib/Target/R600/SIInstructions.td | 42 ++++++++++++++++++++++++++++++++- llvm/lib/Target/R600/VIInstructions.td | 14 +++++++---- 6 files changed, 85 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 85ef37d..66a9c76 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -2156,7 +2156,12 @@ MachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N, SmallVector Ops; Ops.push_back(SDValue(RSrc, 0)); Ops.push_back(N->getOperand(0)); - Ops.push_back(DAG.getConstant(Offset->getSExtValue() << 2, MVT::i32)); + + // The immediate offset is in dwords on SI and in bytes on VI. + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + Ops.push_back(DAG.getConstant(Offset->getSExtValue(), MVT::i32)); + else + Ops.push_back(DAG.getConstant(Offset->getSExtValue() << 2, MVT::i32)); // Copy remaining operands so we keep any chain and glue nodes that follow // the normal operands. diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index 44e47e5..42f10f2 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -971,15 +971,19 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, return RI.regClassCanUseInlineConstant(OpInfo.RegClass); } -bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) { +bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const { switch (AS) { case AMDGPUAS::GLOBAL_ADDRESS: { // MUBUF instructions a 12-bit offset in bytes. return isUInt<12>(OffsetSize); } case AMDGPUAS::CONSTANT_ADDRESS: { - // SMRD instructions have an 8-bit offset in dwords. - return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); + // SMRD instructions have an 8-bit offset in dwords on SI and + // a 20-bit offset in bytes on VI. + if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return isUInt<20>(OffsetSize); + else + return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); } case AMDGPUAS::LOCAL_ADDRESS: case AMDGPUAS::REGION_ADDRESS: { @@ -1701,27 +1705,30 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI, getNamedOperand(*MI, AMDGPU::OpName::offset); const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); + // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes + // on VI. if (OffOp) { + bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS; + unsigned OffScale = isVI ? 1 : 4; // Handle the _IMM variant - unsigned LoOffset = OffOp->getImm(); - unsigned HiOffset = LoOffset + (HalfSize / 4); + unsigned LoOffset = OffOp->getImm() * OffScale; + unsigned HiOffset = LoOffset + HalfSize; Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo) .addOperand(*SBase) - .addImm(LoOffset); + .addImm(LoOffset / OffScale); - if (!isUInt<8>(HiOffset)) { + if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) { unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR) - .addImm(HiOffset << 2); // The immediate offset is in dwords, - // but offset in register is in bytes. + .addImm(HiOffset); // The offset in register is in bytes. Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) .addOperand(*SBase) .addReg(OffsetSGPR); } else { Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi) .addOperand(*SBase) - .addImm(HiOffset); + .addImm(HiOffset / OffScale); } } else { // Handle the _SGPR variant @@ -1786,10 +1793,13 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con ImmOffset = 0; } else { assert(MI->getOperand(2).isImm()); - // SMRD instructions take a dword offsets and MUBUF instructions - // take a byte offset. - ImmOffset = MI->getOperand(2).getImm() << 2; + // SMRD instructions take a dword offsets on SI and byte offset on VI + // and MUBUF instructions always take a byte offset. + ImmOffset = MI->getOperand(2).getImm(); + if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) + ImmOffset <<= 2; RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + if (isUInt<12>(ImmOffset)) { BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), RegOffset) diff --git a/llvm/lib/Target/R600/SIInstrInfo.h b/llvm/lib/Target/R600/SIInstrInfo.h index 16fe9af..6d63816 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.h +++ b/llvm/lib/Target/R600/SIInstrInfo.h @@ -209,7 +209,7 @@ public: /// \brief Return true if the given offset Size in bytes can be folded into /// the immediate offsets of a memory instruction for the given address space. - static bool canFoldOffset(unsigned OffsetSize, unsigned AS) LLVM_READNONE; + bool canFoldOffset(unsigned OffsetSize, unsigned AS) const; /// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding. /// This function will return false if you pass it a 32-bit instruction. diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td index 464eede..5736aad 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.td +++ b/llvm/lib/Target/R600/SIInstrInfo.td @@ -171,6 +171,10 @@ def IMM16bit : PatLeaf <(imm), [{return isUInt<16>(N->getZExtValue());}] >; +def IMM20bit : PatLeaf <(imm), + [{return isUInt<20>(N->getZExtValue());}] +>; + def IMM32bit : PatLeaf <(imm), [{return isUInt<32>(N->getZExtValue());}] >; diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 0e0e362..184a6f4 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -34,6 +34,9 @@ def isSICI : Predicate< >; def isCI : Predicate<"Subtarget.getGeneration() " ">= AMDGPUSubtarget::SEA_ISLANDS">; +def isVI : Predicate < + "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS" +>; def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">; @@ -1974,7 +1977,7 @@ def : Pat < multiclass SMRD_Pattern { - // 1. Offset as 8bit DWORD immediate + // 1. SI-CI: Offset as 8bit DWORD immediate def : Pat < (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))), (vt (Instr_IMM $sbase, (as_dword_i32imm $offset))) @@ -1993,6 +1996,28 @@ multiclass SMRD_Pattern { >; } +multiclass SMRD_Pattern_vi { + + // 1. VI: Offset as 20bit immediate in bytes + def : Pat < + (constant_load (add i64:$sbase, (i64 IMM20bit:$offset))), + (vt (Instr_IMM $sbase, (as_i32imm $offset))) + >; + + // 2. Offset loaded in an 32bit SGPR + def : Pat < + (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))), + (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset))))) + >; + + // 3. No offset at all + def : Pat < + (constant_load i64:$sbase), + (vt (Instr_IMM $sbase, 0)) + >; +} + +let Predicates = [isSICI] in { defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; @@ -2000,6 +2025,19 @@ defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; +} // End Predicates = [isSICI] + +let Predicates = [isVI] in { +defm : SMRD_Pattern_vi ; +defm : SMRD_Pattern_vi ; +defm : SMRD_Pattern_vi ; +defm : SMRD_Pattern_vi ; +defm : SMRD_Pattern_vi ; +defm : SMRD_Pattern_vi ; +defm : SMRD_Pattern_vi ; +} // End Predicates = [isVI] + +let Predicates = [isSICI] in { // 1. Offset as 8bit DWORD immediate def : Pat < @@ -2007,6 +2045,8 @@ def : Pat < (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset)) >; +} // End Predicates = [isSICI] + // 2. Offset loaded in an 32bit SGPR def : Pat < (SIload_constant v4i32:$sbase, imm:$offset), diff --git a/llvm/lib/Target/R600/VIInstructions.td b/llvm/lib/Target/R600/VIInstructions.td index 9fc46fb..733a66b 100644 --- a/llvm/lib/Target/R600/VIInstructions.td +++ b/llvm/lib/Target/R600/VIInstructions.td @@ -9,10 +9,6 @@ // Instruction definitions for VI and newer. //===----------------------------------------------------------------------===// -def isVI : Predicate < - "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS" ->; - let SubtargetPredicate = isVI in { def V_LDEXP_F32 : VOP3InstVI <0x288, "v_ldexp_f32", VOP_F32_F32_I32, @@ -54,6 +50,16 @@ def : Pat < >; //===----------------------------------------------------------------------===// +// SMEM Patterns +//===----------------------------------------------------------------------===// + +// 1. Offset as 8bit DWORD immediate +def : Pat < + (SIload_constant v4i32:$sbase, IMM20bit:$offset), + (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) +>; + +//===----------------------------------------------------------------------===// // MUBUF Patterns //===----------------------------------------------------------------------===// -- 2.7.4