From 91ae1afd3cb9d9a583490ee9eef219287603a18a Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Wed, 23 Nov 2022 18:18:07 +0100 Subject: [PATCH] [AVR] Remove unused register scavenger The LPMW/ELPMW instruction can be modified to use an earlyclobber, which prevents it from using the Z register as an output register. Also see: https://reviews.llvm.org/D131844 Differential Revision: https://reviews.llvm.org/D117957 --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 57 ++-------------------------- llvm/lib/Target/AVR/AVRInstrInfo.td | 2 + llvm/test/CodeGen/AVR/elpm.ll | 24 ++++++------ llvm/test/CodeGen/AVR/pseudo/ELPMWRdZ.mir | 32 ++++++++++++++++ 4 files changed, 50 insertions(+), 65 deletions(-) create mode 100644 llvm/test/CodeGen/AVR/pseudo/ELPMWRdZ.mir diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 076b3f62..db793cc 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetRegisterInfo.h" using namespace llvm; @@ -104,9 +103,6 @@ private: // Common implementation of LPMWRdZ and ELPMWRdZ. bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt); - - /// Scavenges a free GPR8 register for use. - Register scavengeGPR8(MachineInstr &MI); }; char AVRExpandPseudo::ID = 0; @@ -131,9 +127,6 @@ bool AVRExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TRI = STI.getRegisterInfo(); TII = STI.getInstrInfo(); - // We need to track liveness in order to use register scavenging. - MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness); - for (Block &MBB : MF) { bool ContinueExpanding = true; unsigned ExpandCount = 0; @@ -783,7 +776,6 @@ bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; Register DstReg = MI.getOperand(0).getReg(); - Register TmpReg = 0; // 0 for no temporary register Register SrcReg = MI.getOperand(1).getReg(); bool SrcIsKill = MI.getOperand(1).isKill(); unsigned OpLo = IsExt ? AVR::ELPMRdZPi : AVR::LPMRdZPi; @@ -798,35 +790,19 @@ bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt) { buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(Bank); } - // Use a temporary register if src and dst registers are the same. - if (DstReg == SrcReg) - TmpReg = scavengeGPR8(MI); - - Register CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; - Register CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; + // This is enforced by the @earlyclobber constraint. + assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same"); // Load low byte. auto MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(CurDstLoReg, RegState::Define) + .addReg(DstLoReg, RegState::Define) .addReg(SrcReg); - // Push low byte onto stack if necessary. - if (TmpReg) - buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); - // Load high byte. auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(CurDstHiReg, RegState::Define) + .addReg(DstHiReg, RegState::Define) .addReg(SrcReg, getKillRegState(SrcIsKill)); - if (TmpReg) { - // Move the high byte into the final destination. - buildMI(MBB, MBBI, AVR::MOVRdRr, DstHiReg).addReg(TmpReg); - - // Move the low byte from the scratch space into the final destination. - buildMI(MBB, MBBI, AVR::POPRd, DstLoReg); - } - MIBLO.setMemRefs(MI.memoperands()); MIBHI.setMemRefs(MI.memoperands()); @@ -924,31 +900,6 @@ bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode, Block &MBB, return expandAtomicBinaryOp(Opcode, MBB, MBBI, [](MachineInstr &MI) {}); } -Register AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) { - MachineBasicBlock &MBB = *MI.getParent(); - RegScavenger RS; - - RS.enterBasicBlock(MBB); - RS.forward(MI); - - BitVector Candidates = - TRI->getAllocatableSet(*MBB.getParent(), &AVR::GPR8RegClass); - - // Exclude all the registers being used by the instruction. - for (MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && - !Register::isVirtualRegister(MO.getReg())) - Candidates.reset(MO.getReg()); - } - - BitVector Available = RS.getRegsAvailable(&AVR::GPR8RegClass); - Available &= Candidates; - - signed Reg = Available.find_first(); - assert(Reg != -1 && "ran out of registers"); - return Reg; -} - template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return expandAtomicBinaryOp(AVR::LDRdPtr, MBB, MBBI); diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index e3f93e6..47dce53 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1673,6 +1673,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1, mayLoad = 1, "lpm\t$rd, $z+", []>, Requires<[HasLPMX]>; + let Constraints = "@earlyclobber $dst" in def LPMWRdZ : Pseudo<(outs DREGS : $dst), (ins ZREG @@ -1712,6 +1713,7 @@ let mayLoad = 1, hasSideEffects = 0 in { "elpmb\t$dst, $z, $p", []>, Requires<[HasELPMX]>; + let Constraints = "@earlyclobber $dst" in def ELPMWRdZ : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, LD8:$p), "elpmw\t$dst, $z, $p", []>, Requires<[HasELPMX]>; diff --git a/llvm/test/CodeGen/AVR/elpm.ll b/llvm/test/CodeGen/AVR/elpm.ll index e60ceb5..a18f133 100644 --- a/llvm/test/CodeGen/AVR/elpm.ll +++ b/llvm/test/CodeGen/AVR/elpm.ll @@ -44,8 +44,8 @@ define i16 @foo1(i16 %a, i16 %b) { ; CHECK-NEXT: movw r30, r22 ; CHECK-NEXT: ldi r18, 1 ; CHECK-NEXT: out 59, r18 -; CHECK-NEXT: elpm r18, Z+ -; CHECK-NEXT: elpm r19, Z +; CHECK-NEXT: elpm r20, Z+ +; CHECK-NEXT: elpm r21, Z ; CHECK-NEXT: lsl r24 ; CHECK-NEXT: rol r25 ; CHECK-NEXT: subi r24, -lo8(arr0) @@ -53,8 +53,8 @@ define i16 @foo1(i16 %a, i16 %b) { ; CHECK-NEXT: movw r30, r24 ; CHECK-NEXT: lpm r24, Z+ ; CHECK-NEXT: lpm r25, Z -; CHECK-NEXT: sub r24, r18 -; CHECK-NEXT: sbc r25, r19 +; CHECK-NEXT: sub r24, r20 +; CHECK-NEXT: sbc r25, r21 ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %a @@ -73,8 +73,8 @@ define i16 @foo2(i16 %a, i16 %b) { ; CHECK-NEXT: subi r24, -lo8(arr2) ; CHECK-NEXT: sbci r25, -hi8(arr2) ; CHECK-NEXT: movw r30, r24 -; CHECK-NEXT: ldi r24, 2 -; CHECK-NEXT: out 59, r24 +; CHECK-NEXT: ldi r18, 2 +; CHECK-NEXT: out 59, r18 ; CHECK-NEXT: elpm r24, Z+ ; CHECK-NEXT: elpm r25, Z ; CHECK-NEXT: lsl r22 @@ -106,19 +106,19 @@ define i16 @foo3(i16 %a, i16 %b) { ; CHECK-NEXT: movw r30, r22 ; CHECK-NEXT: ldi r18, 1 ; CHECK-NEXT: out 59, r18 -; CHECK-NEXT: elpm r18, Z+ -; CHECK-NEXT: elpm r19, Z +; CHECK-NEXT: elpm r20, Z+ +; CHECK-NEXT: elpm r21, Z ; CHECK-NEXT: lsl r24 ; CHECK-NEXT: rol r25 ; CHECK-NEXT: subi r24, -lo8(arr2) ; CHECK-NEXT: sbci r25, -hi8(arr2) ; CHECK-NEXT: movw r30, r24 -; CHECK-NEXT: ldi r24, 2 -; CHECK-NEXT: out 59, r24 +; CHECK-NEXT: ldi r18, 2 +; CHECK-NEXT: out 59, r18 ; CHECK-NEXT: elpm r24, Z+ ; CHECK-NEXT: elpm r25, Z -; CHECK-NEXT: sub r24, r18 -; CHECK-NEXT: sbc r25, r19 +; CHECK-NEXT: sub r24, r20 +; CHECK-NEXT: sbc r25, r21 ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(3)* @arr2, i16 0, i16 %a diff --git a/llvm/test/CodeGen/AVR/pseudo/ELPMWRdZ.mir b/llvm/test/CodeGen/AVR/pseudo/ELPMWRdZ.mir new file mode 100644 index 0000000..d4469fb --- /dev/null +++ b/llvm/test/CodeGen/AVR/pseudo/ELPMWRdZ.mir @@ -0,0 +1,32 @@ +# RUN: llc -mcpu=atmega1284p -start-before=greedy %s -o - | FileCheck %s + +# This test checks the expansion of the 16-bit ELPM pseudo instruction and that +# the register allocator won't use R31R30 as an output register (which would +# lead to undefined behavior). + +--- | + target triple = "avr--" + define void @test_elpmwrdz() { + entry: + ret void + } +... + +--- +name: test_elpmwrdz +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r31r30 + + ; CHECK-LABEL: test_elpmwrdz + ; CHECK: elpm r24, Z+ + ; CHECK-NEXT: elpm r25, Z + ; CHECK-NEXT: movw r30, r24 + + %1:zreg = COPY killed $r31r30 + %2:ld8 = LDIRdK 1 + %3:dregs = ELPMWRdZ %1, %2, implicit-def dead $r31r30 + $r31r30 = COPY %3 + RET implicit $r31r30 +... -- 2.7.4