From 5583972fe14e532c17d3d1a0607051e5adcbc7bb Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 28 Nov 2022 09:26:21 -0800 Subject: [PATCH] [RISCV] Simplify eliminateFrameIndex in advance of reuse [nfc-ish] The prior code intermixed several concerns - the actual materialization of the offset, the choice of destination register, and whether to prune the ADDI. This version factors the first part out, and then reasons only about the later two. My intention is to merge the adjustReg routine with the one from frame lowering, and then explore using the merged result to simplify frame setup and tear down. This change is conceptually NFC, but since it results in slightly different vreg usage, the end result can change register allocation in minor ways. Differential Revision: https://reviews.llvm.org/D138502 --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 173 ++++++++++----------- llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 4 + .../CodeGen/RISCV/rvv/addi-scalable-offset.mir | 4 +- llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir | 34 ++-- 4 files changed, 103 insertions(+), 112 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 57370eb..0767cec 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -161,6 +161,55 @@ bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, return true; } +void RISCVRegisterInfo::adjustReg(MachineBasicBlock::iterator II, Register DestReg, + Register SrcReg, StackOffset Offset) const { + + if (DestReg == SrcReg && !Offset.getFixed() && !Offset.getScalable()) + return; + + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const RISCVSubtarget &ST = MF.getSubtarget(); + const RISCVInstrInfo *TII = ST.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock &MBB = *MI.getParent(); + + bool SrcRegIsKill = false; + + if (Offset.getScalable()) { + unsigned ScalableAdjOpc = RISCV::ADD; + int64_t ScalableValue = Offset.getScalable(); + if (ScalableValue < 0) { + ScalableValue = -ScalableValue; + ScalableAdjOpc = RISCV::SUB; + } + // Get vlenb and multiply vlen with the number of vector registers. + TII->getVLENFactoredAmount(MF, MBB, II, DL, DestReg, ScalableValue); + BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg) + .addReg(SrcReg).addReg(DestReg, RegState::Kill); + SrcReg = DestReg; + SrcRegIsKill = true; + } + + if (Offset.getFixed()) { + // TODO: Merge this with FrameLowerings adjustReg which knows a few + // more tricks than this does for fixed offsets. + if (isInt<12>(Offset.getFixed())) { + BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), DestReg) + .addReg(SrcReg, getKillRegState(SrcRegIsKill)) + .addImm(Offset.getFixed()); + } else { + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed()); + BuildMI(MBB, II, DL, TII->get(RISCV::ADD), DestReg) + .addReg(SrcReg, getKillRegState(SrcRegIsKill)) + .addReg(ScratchReg, RegState::Kill); + } + } +} + + bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { @@ -199,108 +248,46 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, "Frame offsets outside of the signed 32-bit range not supported"); } - MachineBasicBlock &MBB = *MI.getParent(); - bool FrameRegIsKill = false; - - // If the instruction is an ADDI, we can use it's destination as a scratch - // register. Load instructions might have an FP or vector destination and - // stores don't have a destination register. - Register DestReg; - if (MI.getOpcode() == RISCV::ADDI) - DestReg = MI.getOperand(0).getReg(); - - // If required, pre-compute the scalable factor amount which will be used in - // later offset computation. Since this sequence requires up to two scratch - // registers -- after which one is made free -- this grants us better - // scavenging of scratch registers as only up to two are live at one time, - // rather than three. - unsigned ScalableAdjOpc = RISCV::ADD; - if (Offset.getScalable()) { - int64_t ScalableValue = Offset.getScalable(); - if (ScalableValue < 0) { - ScalableValue = -ScalableValue; - ScalableAdjOpc = RISCV::SUB; + if (!IsRVVSpill) { + // TODO: Consider always storing the low bits of the immediate in the + // offset so that large immediate is cheaper to materialize? + if (isInt<12>(Offset.getFixed())) { + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); + Offset = StackOffset::get(0, Offset.getScalable()); + } else { + // Since we're going to materialize the full offset below, clear the + // portion encoded in the immediate. + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); } - // Use DestReg if it exists, otherwise create a new register. - if (!DestReg) - DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - // Get vlenb and multiply vlen with the number of vector registers. - TII->getVLENFactoredAmount(MF, MBB, II, DL, DestReg, ScalableValue); } - if (!isInt<12>(Offset.getFixed())) { - // The offset won't fit in an immediate, so use a scratch register instead - // Modify Offset and FrameReg appropriately. - - // Reuse destination register if it exists and is not holding a scalable - // offset. - Register ScratchReg = DestReg; - if (!DestReg || Offset.getScalable()) { - ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - // Also save to DestReg if it doesn't exist. - if (!DestReg) - DestReg = ScratchReg; - } - - TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed()); - BuildMI(MBB, II, DL, TII->get(RISCV::ADD), ScratchReg) - .addReg(FrameReg, getKillRegState(FrameRegIsKill)) - .addReg(ScratchReg, RegState::Kill); - // If this was an ADDI and there is no scalable offset, we can remove it. - if (MI.getOpcode() == RISCV::ADDI && !Offset.getScalable()) { - assert(MI.getOperand(0).getReg() == ScratchReg && - "Expected to have written ADDI destination register"); - MI.eraseFromParent(); - return true; - } - - Offset = StackOffset::get(0, Offset.getScalable()); - FrameReg = ScratchReg; - FrameRegIsKill = true; - } - - // Add in the scalable offset which has already been computed in DestReg. - if (Offset.getScalable()) { - assert(DestReg && "DestReg should be valid"); - BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg) - .addReg(FrameReg, getKillRegState(FrameRegIsKill)) - .addReg(DestReg, RegState::Kill); - // If this was an ADDI and there is no fixed offset, we can remove it. - if (MI.getOpcode() == RISCV::ADDI && !Offset.getFixed()) { - assert(MI.getOperand(0).getReg() == DestReg && - "Expected to have written ADDI destination register"); - MI.eraseFromParent(); - return true; - } - FrameReg = DestReg; - FrameRegIsKill = true; - } - - // Handle the fixed offset which might be zero. - if (IsRVVSpill) { - // RVVSpills don't have an immediate. Add an ADDI if the fixed offset is - // needed. - if (Offset.getFixed()) { - // Reuse DestReg if it exists, otherwise create a new register. - if (!DestReg) - DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), DestReg) - .addReg(FrameReg, getKillRegState(FrameRegIsKill)) - .addImm(Offset.getFixed()); - FrameReg = DestReg; - FrameRegIsKill = true; - } + if (Offset.getScalable() || Offset.getFixed()) { + Register DestReg; + if (MI.getOpcode() == RISCV::ADDI) + DestReg = MI.getOperand(0).getReg(); + else + DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + adjustReg(II, DestReg, FrameReg, Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(DestReg, /*IsDef*/false, + /*IsImp*/false, + /*IsKill*/true); } else { - // Otherwise we can replace the original immediate. - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false, + /*IsImp*/false, + /*IsKill*/false); } - // Finally, replace the frame index operand. - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, - FrameRegIsKill); + // If after materializing the adjustment, we have a pointless ADDI, remove it + if (MI.getOpcode() == RISCV::ADDI && + MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && + MI.getOperand(2).getImm() == 0) { + MI.eraseFromParent(); + return true; + } auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(MI.getOpcode()); if (ZvlssegInfo) { + MachineBasicBlock &MBB = *MI.getParent(); Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); uint32_t ShiftAmount = Log2_32(ZvlssegInfo->second); diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 4391147..674393d 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -38,6 +38,10 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg, int &FrameIdx) const override; + // Update DestReg to have the value of SrcReg plus an Offset. + void adjustReg(MachineBasicBlock::iterator II, Register DestReg, + Register SrcReg, StackOffset Offset) const; + bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir index e7b7ba0..1df2383 100644 --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -44,10 +44,10 @@ body: | ; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 killed renamable $x10, $noreg, 6 /* e64 */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) ; CHECK-NEXT: $x10 = PseudoReadVLENB ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 + ; CHECK-NEXT: $x10 = SUB $x8, killed $x10 ; CHECK-NEXT: $x11 = LUI 1048575 ; CHECK-NEXT: $x11 = ADDIW killed $x11, 1824 - ; CHECK-NEXT: $x11 = ADD $x8, killed $x11 - ; CHECK-NEXT: $x10 = SUB killed $x11, killed $x10 + ; CHECK-NEXT: $x10 = ADD killed $x10, killed $x11 ; CHECK-NEXT: VS1R_V killed renamable $v8, killed renamable $x10 ; CHECK-NEXT: $x2 = frame-destroy ADDI $x8, -2048 ; CHECK-NEXT: $x2 = frame-destroy ADDI killed $x2, -224 diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir index a02f69be..a54ce83 100644 --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -92,10 +92,10 @@ body: | ; CHECK-NEXT: $x10 = PseudoReadVLENB ; CHECK-NEXT: $x11 = ADDI killed $x0, 50 ; CHECK-NEXT: $x10 = MUL killed $x10, killed $x11 + ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 ; CHECK-NEXT: $x11 = LUI 1 ; CHECK-NEXT: $x11 = ADDIW killed $x11, -1888 - ; CHECK-NEXT: $x11 = ADD $x2, killed $x11 - ; CHECK-NEXT: $x10 = ADD killed $x11, killed $x10 + ; CHECK-NEXT: $x10 = ADD killed $x10, killed $x11 ; CHECK-NEXT: PseudoVSPILL_M1 killed renamable $v25, killed $x10 :: (store unknown-size into %stack.1, align 8) ; CHECK-NEXT: renamable $x1 = ADDI $x0, 255 ; CHECK-NEXT: renamable $x5 = nuw ADDI $x2, 384 @@ -111,9 +111,11 @@ body: | ; CHECK-NEXT: renamable $x21 = ADDI $x2, 1664 ; CHECK-NEXT: renamable $x22 = ADDI $x2, 1792 ; CHECK-NEXT: renamable $x23 = ADDI $x2, 1920 - ; CHECK-NEXT: $x24 = LUI 1 - ; CHECK-NEXT: $x24 = ADDIW killed $x24, -2048 - ; CHECK-NEXT: $x24 = ADD $x2, killed $x24 + ; CHECK-NEXT: SD killed $x1, $x2, 8 :: (store (s64) into %stack.15) + ; CHECK-NEXT: SD killed $x5, $x2, 0 :: (store (s64) into %stack.16) + ; CHECK-NEXT: $x11 = LUI 1 + ; CHECK-NEXT: $x11 = ADDIW killed $x11, -2048 + ; CHECK-NEXT: $x24 = ADD $x2, killed $x11 ; CHECK-NEXT: renamable $x25 = ADDI $x2, 128 ; CHECK-NEXT: renamable $x26 = ADDI $x2, 128 ; CHECK-NEXT: renamable $x27 = ADDI $x0, 2 @@ -129,18 +131,16 @@ body: | ; CHECK-NEXT: renamable $x16 = SUB killed renamable $x13, renamable $x13 ; CHECK-NEXT: dead renamable $x13 = PseudoVSETIVLI 1, 64 /* e8, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $x13 = nsw ADDI renamable $x16, -2 - ; CHECK-NEXT: SD killed $x10, $x2, 8 :: (store (s64) into %stack.15) - ; CHECK-NEXT: $x10 = PseudoReadVLENB - ; CHECK-NEXT: SD killed $x12, $x2, 0 :: (store (s64) into %stack.16) - ; CHECK-NEXT: $x12 = ADDI killed $x0, 50 - ; CHECK-NEXT: $x10 = MUL killed $x10, killed $x12 - ; CHECK-NEXT: $x12 = LUI 1 - ; CHECK-NEXT: $x12 = ADDIW killed $x12, -1888 - ; CHECK-NEXT: $x12 = ADD $x2, killed $x12 - ; CHECK-NEXT: $x10 = ADD killed $x12, killed $x10 - ; CHECK-NEXT: $x12 = LD $x2, 0 :: (load (s64) from %stack.16) - ; CHECK-NEXT: renamable $v0 = PseudoVRELOAD_M1 killed $x10 :: (load unknown-size from %stack.1, align 8) - ; CHECK-NEXT: $x10 = LD $x2, 8 :: (load (s64) from %stack.15) + ; CHECK-NEXT: $x1 = PseudoReadVLENB + ; CHECK-NEXT: $x5 = ADDI killed $x0, 50 + ; CHECK-NEXT: $x1 = MUL killed $x1, killed $x5 + ; CHECK-NEXT: $x1 = ADD $x2, killed $x1 + ; CHECK-NEXT: $x5 = LUI 1 + ; CHECK-NEXT: $x5 = ADDIW killed $x5, -1888 + ; CHECK-NEXT: $x1 = ADD killed $x1, killed $x5 + ; CHECK-NEXT: $x5 = LD $x2, 0 :: (load (s64) from %stack.16) + ; CHECK-NEXT: renamable $v0 = PseudoVRELOAD_M1 killed $x1 :: (load unknown-size from %stack.1, align 8) + ; CHECK-NEXT: $x1 = LD $x2, 8 :: (load (s64) from %stack.15) ; CHECK-NEXT: renamable $v0 = PseudoVSLIDEDOWN_VX_M1 undef renamable $v0, killed renamable $v0, killed renamable $x13, $noreg, 3 /* e8 */, 1 /* ta, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: renamable $x13 = PseudoVMV_X_S_M1 killed renamable $v0, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: BLT killed renamable $x16, renamable $x27, %bb.2 -- 2.7.4