From 2a4acf3ea8db19981284468c354aea2835fbfa08 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Sun, 24 Jan 2021 11:04:37 +0800 Subject: [PATCH] [AVR] Optimize 8-bit int shift Reviewed By: dylanmckay Differential Revision: https://reviews.llvm.org/D90678 --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 108 +++++++++++++++++++++++++++ llvm/lib/Target/AVR/AVRISelLowering.cpp | 12 +++ llvm/lib/Target/AVR/AVRISelLowering.h | 3 + llvm/lib/Target/AVR/AVRInstrInfo.td | 18 +++++ llvm/test/CodeGen/AVR/shift.ll | 26 +++++++ llvm/test/CodeGen/AVR/smul-with-overflow.ll | 9 +-- 6 files changed, 169 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index d10f014..a48d3d1 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -1476,6 +1476,111 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + + // ror r24 + // clr r24 + // ror r24 + + buildMI(MBB, MBBI, AVR::RORRd) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + buildMI(MBB, MBBI, AVR::EORRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + auto MIRRC = + buildMI(MBB, MBBI, AVR::RORRd) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MIRRC->getOperand(2).setIsDead(); + + // SREG is always implicitly killed + MIRRC->getOperand(3).setIsKill(); + + MI.eraseFromParent(); + return true; +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + + // rol r24 + // clr r24 + // rol r24 + + buildMI(MBB, MBBI, AVR::ADCRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + buildMI(MBB, MBBI, AVR::EORRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + auto MIRRC = + buildMI(MBB, MBBI, AVR::ADCRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MIRRC->getOperand(3).setIsDead(); + + // SREG is always implicitly killed + MIRRC->getOperand(4).setIsKill(); + + MI.eraseFromParent(); + return true; +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + + // lsl r24 + // sbc r24, r24 + + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + auto MIRRC = buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MIRRC->getOperand(3).setIsDead(); + + // SREG is always implicitly killed + MIRRC->getOperand(4).setIsKill(); + + MI.eraseFromParent(); + return true; +} + template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; @@ -1697,6 +1802,9 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) { EXPAND(AVR::RORWRd); EXPAND(AVR::ROLWRd); EXPAND(AVR::ASRWRd); + EXPAND(AVR::LSLB7Rd); + EXPAND(AVR::LSRB7Rd); + EXPAND(AVR::ASRB7Rd); EXPAND(AVR::SEXT); EXPAND(AVR::ZEXT); EXPAND(AVR::SPREAD); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index d919e08..3e7c298 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -349,6 +349,18 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { Victim = DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT)); ShiftAmount -= 4; + } else if (Op.getOpcode() == ISD::SHL && ShiftAmount == 7) { + // Optimize LSL when ShiftAmount == 7. + Victim = DAG.getNode(AVRISD::LSL7, dl, VT, Victim); + ShiftAmount = 0; + } else if (Op.getOpcode() == ISD::SRL && ShiftAmount == 7) { + // Optimize LSR when ShiftAmount == 7. + Victim = DAG.getNode(AVRISD::LSR7, dl, VT, Victim); + ShiftAmount = 0; + } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) { + // Optimize ASR when ShiftAmount == 7. + Victim = DAG.getNode(AVRISD::ASR7, dl, VT, Victim); + ShiftAmount = 0; } } diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h index 29d814b..7aff415 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/llvm/lib/Target/AVR/AVRISelLowering.h @@ -38,6 +38,9 @@ enum NodeType { LSL, ///< Logical shift left. LSR, ///< Logical shift right. ASR, ///< Arithmetic shift right. + LSL7, ///< Logical shift left 7 bits. + LSR7, ///< Logical shift right 7 bits. + ASR7, ///< Arithmetic shift right 7 bits. ROR, ///< Bit rotate right. ROL, ///< Bit rotate left. LSLLOOP, ///< A loop of single logical shift left instructions. diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index 926d1f8..9f7c16f 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -59,6 +59,9 @@ def AVRlsr : SDNode<"AVRISD::LSR", SDTIntUnaryOp>; def AVRrol : SDNode<"AVRISD::ROL", SDTIntUnaryOp>; def AVRror : SDNode<"AVRISD::ROR", SDTIntUnaryOp>; def AVRasr : SDNode<"AVRISD::ASR", SDTIntUnaryOp>; +def AVRlsl7 : SDNode<"AVRISD::LSL7", SDTIntUnaryOp>; +def AVRlsr7 : SDNode<"AVRISD::LSR7", SDTIntUnaryOp>; +def AVRasr7 : SDNode<"AVRISD::ASR7", SDTIntUnaryOp>; // Pseudo shift nodes for non-constant shift amounts. def AVRlslLoop : SDNode<"AVRISD::LSLLOOP", SDTIntShiftOp>; @@ -1666,6 +1669,11 @@ Defs = [SREG] in "lslw\t$rd", [(set i16:$rd, (AVRlsl i16:$src)), (implicit SREG)]>; + def LSLB7Rd : Pseudo<(outs GPR8:$rd), + (ins GPR8:$src), + "lslb7\t$rd", + [(set i8:$rd, (AVRlsl7 i8:$src)), (implicit SREG)]>; + def LSRRd : FRd<0b1001, 0b0100110, (outs GPR8:$rd), @@ -1673,6 +1681,11 @@ Defs = [SREG] in "lsr\t$rd", [(set i8:$rd, (AVRlsr i8:$src)), (implicit SREG)]>; + def LSRB7Rd : Pseudo<(outs GPR8:$rd), + (ins GPR8:$src), + "lsrb7\t$rd", + [(set i8:$rd, (AVRlsr7 i8:$src)), (implicit SREG)]>; + def LSRWRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "lsrw\t$rd", @@ -1685,6 +1698,11 @@ Defs = [SREG] in "asr\t$rd", [(set i8:$rd, (AVRasr i8:$src)), (implicit SREG)]>; + def ASRB7Rd : Pseudo<(outs GPR8:$rd), + (ins GPR8:$src), + "asrb7\t$rd", + [(set i8:$rd, (AVRasr7 i8:$src)), (implicit SREG)]>; + def ASRWRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "asrw\t$rd", diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll index e5d3aee..7a21a01 100644 --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -152,3 +152,29 @@ define i8 @lsr_i8_6(i8 %a) { %res = lshr i8 %a, 6 ret i8 %res } + +define i8 @lsl_i8_7(i8 %a) { +; CHECK-LABEL: lsl_i8_7 +; CHECK: ror r24 +; CHECK-NEXT: clr r24 +; CHECK-NEXT: ror r24 + %result = shl i8 %a, 7 + ret i8 %result +} + +define i8 @lsr_i8_7(i8 %a) { +; CHECK-LABEL: lsr_i8_7 +; CHECK: rol r24 +; CHECK-NEXT: clr r24 +; CHECK-NEXT: rol r24 + %result = lshr i8 %a, 7 + ret i8 %result +} + +define i8 @asr_i8_7(i8 %a) { +; CHECK-LABEL: asr_i8_7 +; CHECK: lsl r24 +; CHECK-NEXT: sbc r24, r24 + %result = ashr i8 %a, 7 + ret i8 %result +} diff --git a/llvm/test/CodeGen/AVR/smul-with-overflow.ll b/llvm/test/CodeGen/AVR/smul-with-overflow.ll index 4004f1b..f2d2916 100644 --- a/llvm/test/CodeGen/AVR/smul-with-overflow.ll +++ b/llvm/test/CodeGen/AVR/smul-with-overflow.ll @@ -13,13 +13,8 @@ entry-block: ; CHECK: muls r24, r22 ; CHECK: mov [[HIGH:r[0-9]+]], r1 ; CHECK: mov [[LOW:r[0-9]+]], r0 -; CHECK: asr {{.*}}[[LOW]] -; CHECK: asr {{.*}}[[LOW]] -; CHECK: asr {{.*}}[[LOW]] -; CHECK: asr {{.*}}[[LOW]] -; CHECK: asr {{.*}}[[LOW]] -; CHECK: asr {{.*}}[[LOW]] -; CHECK: asr {{.*}}[[LOW]] +; CHECK: lsl {{.*}}[[LOW]] +; CHECK: sbc {{.*}}[[LOW]], {{.*}}[[LOW]] ; CHECK: ldi [[RET:r[0-9]+]], 1 ; CHECK: cp {{.*}}[[HIGH]], {{.*}}[[LOW]] ; CHECK: brne [[LABEL:.LBB[_0-9]+]] -- 2.7.4