From bce2e208e08f62dd362566e43b16cbfece459bec Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Sat, 26 Mar 2022 03:24:18 +0000 Subject: [PATCH] [AVR] Optimize int16 airthmetic right shift for shift amount 7/14/15 Reviewed By: aykevl Differential Revision: https://reviews.llvm.org/D115618 --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 156 ++++++++++++++++++++++++++- llvm/lib/Target/AVR/AVRISelLowering.cpp | 23 +++- llvm/lib/Target/AVR/AVRInstrInfo.td | 2 +- llvm/test/CodeGen/AVR/pseudo/ASRWNRd.mir | 41 +++++++ llvm/test/CodeGen/AVR/shift.ll | 33 ++++++ llvm/test/CodeGen/AVR/sign-extension.ll | 2 +- 6 files changed, 251 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AVR/pseudo/ASRWNRd.mir diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 5618d88..de93c86 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -84,18 +84,23 @@ private: bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI); - /// Specific shift implementation. + /// Specific shift implementation for int8. bool expandLSLB7Rd(Block &MBB, BlockIt MBBI); bool expandLSRB7Rd(Block &MBB, BlockIt MBBI); bool expandASRB6Rd(Block &MBB, BlockIt MBBI); bool expandASRB7Rd(Block &MBB, BlockIt MBBI); + + /// Specific shift implementation for int16. bool expandLSLW4Rd(Block &MBB, BlockIt MBBI); bool expandLSRW4Rd(Block &MBB, BlockIt MBBI); + bool expandASRW7Rd(Block &MBB, BlockIt MBBI); bool expandLSLW8Rd(Block &MBB, BlockIt MBBI); bool expandLSRW8Rd(Block &MBB, BlockIt MBBI); bool expandASRW8Rd(Block &MBB, BlockIt MBBI); bool expandLSLW12Rd(Block &MBB, BlockIt MBBI); bool expandLSRW12Rd(Block &MBB, BlockIt MBBI); + bool expandASRW14Rd(Block &MBB, BlockIt MBBI); + bool expandASRW15Rd(Block &MBB, BlockIt MBBI); // Common implementation of LPMWRdZ and ELPMWRdZ. bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt); @@ -1401,7 +1406,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { // add hireg, hireg <==> lsl hireg auto MILSL = buildMI(MBB, MBBI, AVR::ADDRdRr) - .addReg(DstHiReg, RegState::Define, getDeadRegState(DstIsDead)) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstHiReg, getKillRegState(DstIsKill)) .addReg(DstHiReg, getKillRegState(DstIsKill)); @@ -1820,6 +1825,53 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return true; } +bool AVRExpandPseudo::expandASRW7Rd(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(3).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // lsl r24 + // mov r24,r25 + // rol r24 + // sbc r25,r25 + + // lsl r24 <=> add r24, r24 + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, RegState::Kill) + .addReg(DstLoReg, RegState::Kill); + + // mov r24, r25 + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg); + + // rol r24 <=> adc r24, r24 + buildMI(MBB, MBBI, AVR::ADCRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, getKillRegState(DstIsKill)) + .addReg(DstLoReg, getKillRegState(DstIsKill)); + + // sbc r25, r25 + auto MISBC = + buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, getKillRegState(DstIsKill)) + .addReg(DstHiReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MISBC->getOperand(3).setIsDead(); + // SREG is always implicitly killed + MISBC->getOperand(4).setIsKill(); + + MI.eraseFromParent(); + return true; +} + bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; @@ -1846,8 +1898,102 @@ bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) { .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstHiReg, getKillRegState(DstIsKill)) .addReg(DstHiReg, getKillRegState(DstIsKill)); + if (ImpIsDead) MIBHI->getOperand(3).setIsDead(); + // SREG is always implicitly killed + MIBHI->getOperand(4).setIsKill(); + + MI.eraseFromParent(); + return true; +} +bool AVRExpandPseudo::expandASRW14Rd(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(3).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // lsl r25 + // sbc r24, r24 + // lsl r25 + // mov r25, r24 + // rol r24 + + // lsl r25 <=> add r25, r25 + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, RegState::Kill) + .addReg(DstHiReg, RegState::Kill); + + // sbc r24, r24 + buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, RegState::Kill) + .addReg(DstLoReg, RegState::Kill); + + // lsl r25 <=> add r25, r25 + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, RegState::Kill) + .addReg(DstHiReg, RegState::Kill); + + // mov r25, r24 + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg); + + // rol r24 <=> adc r24, r24 + auto MIROL = + buildMI(MBB, MBBI, AVR::ADCRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, getKillRegState(DstIsKill)) + .addReg(DstLoReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MIROL->getOperand(3).setIsDead(); + // SREG is always implicitly killed + MIROL->getOperand(4).setIsKill(); + + MI.eraseFromParent(); + return false; +} + +bool AVRExpandPseudo::expandASRW15Rd(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool ImpIsDead = MI.getOperand(3).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // lsl r25 + // sbc r25, r25 + // mov r24, r25 + + // lsl r25 <=> add r25, r25 + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstHiReg, RegState::Define) + .addReg(DstHiReg, RegState::Kill) + .addReg(DstHiReg, RegState::Kill); + + // sbc r25, r25 + auto MISBC = + buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg, RegState::Kill) + .addReg(DstHiReg, RegState::Kill); + if (ImpIsDead) + MISBC->getOperand(3).setIsDead(); + // SREG is always implicitly killed + MISBC->getOperand(4).setIsKill(); + + // mov r24, r25 + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstHiReg); MI.eraseFromParent(); return true; @@ -1858,8 +2004,14 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned Imm = MI.getOperand(2).getImm(); switch (Imm) { + case 7: + return expandASRW7Rd(MBB, MBBI); case 8: return expandASRW8Rd(MBB, MBBI); + case 14: + return expandASRW14Rd(MBB, MBBI); + case 15: + return expandASRW15Rd(MBB, MBBI); default: llvm_unreachable("unimplemented asrwn"); return false; diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 252331e..7a1e7b1 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -270,8 +270,6 @@ EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, } SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { - //: TODO: this function has to be completely rewritten to produce optimal - // code, for now it's producing very long but correct code. unsigned Opc8; const SDNode *N = Op.getNode(); EVT VT = Op.getValueType(); @@ -372,6 +370,27 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { ShiftAmount = 0; } } else if (VT.getSizeInBits() == 16) { + if (Op.getOpcode() == ISD::SRA) + // Special optimization for int16 arithmetic right shift. + switch (ShiftAmount) { + case 15: + Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, + DAG.getConstant(15, dl, VT)); + ShiftAmount = 0; + break; + case 14: + Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, + DAG.getConstant(14, dl, VT)); + ShiftAmount = 0; + break; + case 7: + Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, + DAG.getConstant(7, dl, VT)); + ShiftAmount = 0; + break; + default: + break; + } if (4 <= ShiftAmount && ShiftAmount < 8) switch (Op.getOpcode()) { case ISD::SHL: diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index c67f202..9b45b50 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1943,7 +1943,7 @@ let Constraints = "$src = $rd", Defs = [SREG] in { : $src)), (implicit SREG)]>; - def ASRWNRd : Pseudo<(outs DLDREGS + def ASRWNRd : Pseudo<(outs DREGS : $rd), (ins DREGS : $src, imm16 diff --git a/llvm/test/CodeGen/AVR/pseudo/ASRWNRd.mir b/llvm/test/CodeGen/AVR/pseudo/ASRWNRd.mir new file mode 100644 index 0000000..943421a --- /dev/null +++ b/llvm/test/CodeGen/AVR/pseudo/ASRWNRd.mir @@ -0,0 +1,41 @@ +# RUN: llc -O0 -run-pass=avr-expand-pseudo %s -o - | FileCheck %s + +--- | + target triple = "avr--" + define void @test() { + entry: + ret void + } +... + +--- +name: test +body: | + bb.0.entry: + liveins: $r15r14, $r13r12, $r11r10, $r17r16 + + ; CHECK-LABEL: test + + ; CHECK: $r14 = ADDRdRr killed $r14, killed $r14, implicit-def $sreg + ; CHECK-NEXT: $r14 = MOVRdRr $r15 + ; CHECK-NEXT: $r14 = ADCRdRr $r14, $r14, implicit-def $sreg, implicit $sreg + ; CHECK-NEXT: $r15 = SBCRdRr $r15, $r15, implicit-def $sreg, implicit killed $sreg + $r15r14 = ASRWNRd $r15r14, 7, implicit-def $sreg + + ; CHECK-NEXT: $r12 = MOVRdRr $r13 + ; CHECK-NEXT: $r13 = ADDRdRr killed $r13, killed $r13, implicit-def $sreg + ; CHECK-NEXT: $r13 = SBCRdRr $r13, $r13, implicit-def $sreg, implicit killed $sreg + $r13r12 = ASRWNRd $r13r12, 8, implicit-def $sreg + + ; CHECK-NEXT: $r11 = ADDRdRr killed $r11, killed $r11, implicit-def $sreg + ; CHECK-NEXT: $r10 = SBCRdRr killed $r10, killed $r10, implicit-def $sreg, implicit $sreg + ; CHECK-NEXT: $r11 = ADDRdRr killed $r11, killed $r11, implicit-def $sreg + ; CHECK-NEXT: $r11 = MOVRdRr $r10 + ; CHECK-NEXT: $r10 = ADCRdRr $r10, $r10, implicit-def $sreg, implicit killed $sreg + $r11r10 = ASRWNRd $r11r10, 14, implicit-def $sreg + + ; CHECK-NEXT: $r17 = ADDRdRr killed $r17, killed $r17, implicit-def $sreg + ; CHECK-NEXT: $r17 = SBCRdRr killed $r17, killed $r17, implicit-def $sreg, implicit killed $sreg + ; CHECK-NEXT: $r16 = MOVRdRr $r17 + $r17r16 = ASRWNRd $r17r16, 15, implicit-def $sreg +... diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll index beba537..46993bc 100644 --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -301,6 +301,17 @@ define i16 @lsr_i16_13(i16 %a) { ret i16 %result } +define i16 @asr_i16_7(i16 %a) { +; CHECK-LABEL: asr_i16_7 +; CHECK: lsl r24 +; CHECK-NEXT: mov r24, r25 +; CHECK-NEXT: rol r24 +; CHECK-NEXT: sbc r25, r25 +; CHECK-NEXT: ret + %result = ashr i16 %a, 7 + ret i16 %result +} + define i16 @asr_i16_9(i16 %a) { ; CHECK-LABEL: asr_i16_9 ; CHECK: mov r24, r25 @@ -325,3 +336,25 @@ define i16 @asr_i16_12(i16 %a) { %result = ashr i16 %a, 12 ret i16 %result } + +define i16 @asr_i16_14(i16 %a) { +; CHECK-LABEL: asr_i16_14 +; CHECK: lsl r25 +; CHECK-NEXT: sbc r24, r24 +; CHECK-NEXT: lsl r25 +; CHECK-NEXT: mov r25, r24 +; CHECK-NEXT: rol r24 +; CHECK-NEXT: ret + %result = ashr i16 %a, 14 + ret i16 %result +} + +define i16 @asr_i16_15(i16 %a) { +; CHECK-LABEL: asr_i16_15 +; CHECK: lsl r25 +; CHECK-NEXT: sbc r25, r25 +; CHECK-NEXT: mov r24, r25 +; CHECK-NEXT: ret + %result = ashr i16 %a, 15 + ret i16 %result +} diff --git a/llvm/test/CodeGen/AVR/sign-extension.ll b/llvm/test/CodeGen/AVR/sign-extension.ll index be2bea7..116617a 100644 --- a/llvm/test/CodeGen/AVR/sign-extension.ll +++ b/llvm/test/CodeGen/AVR/sign-extension.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=avr < %s | FileCheck %s +; RUN: llc -march=avr -verify-machineinstrs < %s | FileCheck %s define i8 @sign_extended_1_to_8(i1) { ; CHECK-LABEL: sign_extended_1_to_8 -- 2.7.4