// Create loop block.
MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *CheckBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(I, LoopBB);
+ F->insert(I, CheckBB);
F->insert(I, RemBB);
// Update machine-CFG edges by transferring all successors of the current
BB->end());
RemBB->transferSuccessorsAndUpdatePHIs(BB);
- // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB.
- BB->addSuccessor(LoopBB);
- BB->addSuccessor(RemBB);
- LoopBB->addSuccessor(RemBB);
- LoopBB->addSuccessor(LoopBB);
+ // Add edges BB => LoopBB => CheckBB => RemBB, CheckBB => LoopBB.
+ BB->addSuccessor(CheckBB);
+ LoopBB->addSuccessor(CheckBB);
+ CheckBB->addSuccessor(LoopBB);
+ CheckBB->addSuccessor(RemBB);
- Register ShiftAmtReg = RI.createVirtualRegister(&AVR::LD8RegClass);
- Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::LD8RegClass);
+ Register ShiftAmtReg = RI.createVirtualRegister(&AVR::GPR8RegClass);
+ Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::GPR8RegClass);
Register ShiftReg = RI.createVirtualRegister(RC);
Register ShiftReg2 = RI.createVirtualRegister(RC);
Register ShiftAmtSrcReg = MI.getOperand(2).getReg();
Register DstReg = MI.getOperand(0).getReg();
// BB:
- // cpi N, 0
- // breq RemBB
- BuildMI(BB, dl, TII.get(AVR::CPIRdK)).addReg(ShiftAmtSrcReg).addImm(0);
- BuildMI(BB, dl, TII.get(AVR::BREQk)).addMBB(RemBB);
+ // rjmp CheckBB
+ BuildMI(BB, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB);
// LoopBB:
- // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
- // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]
// ShiftReg2 = shift ShiftReg
+ auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
+ if (HasRepeatedOperand)
+ ShiftMI.addReg(ShiftReg);
+
+ // CheckBB:
+ // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+ // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]
+ // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
// ShiftAmt2 = ShiftAmt - 1;
- BuildMI(LoopBB, dl, TII.get(AVR::PHI), ShiftReg)
+ // if (ShiftAmt2 >= 0) goto LoopBB;
+ BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftReg)
.addReg(SrcReg)
.addMBB(BB)
.addReg(ShiftReg2)
.addMBB(LoopBB);
- BuildMI(LoopBB, dl, TII.get(AVR::PHI), ShiftAmtReg)
+ BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftAmtReg)
.addReg(ShiftAmtSrcReg)
.addMBB(BB)
.addReg(ShiftAmtReg2)
.addMBB(LoopBB);
-
- auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
- if (HasRepeatedOperand)
- ShiftMI.addReg(ShiftReg);
-
- BuildMI(LoopBB, dl, TII.get(AVR::SUBIRdK), ShiftAmtReg2)
- .addReg(ShiftAmtReg)
- .addImm(1);
- BuildMI(LoopBB, dl, TII.get(AVR::BRNEk)).addMBB(LoopBB);
-
- // RemBB:
- // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
- BuildMI(*RemBB, RemBB->begin(), dl, TII.get(AVR::PHI), DstReg)
+ BuildMI(CheckBB, dl, TII.get(AVR::PHI), DstReg)
.addReg(SrcReg)
.addMBB(BB)
.addReg(ShiftReg2)
.addMBB(LoopBB);
+ BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2)
+ .addReg(ShiftAmtReg);
+ BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB);
+
MI.eraseFromParent(); // The pseudo instruction is gone now.
return RemBB;
}
define i8 @rol8(i8 %val, i8 %amt) {
; CHECK: andi r22, 7
- ; CHECK-NEXT: cpi r22, 0
- ; CHECK-NEXT: breq .LBB0_2
+ ; CHECK-NEXT: dec r22
+ ; CHECK-NEXT: brmi .LBB0_2
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: lsl r24
; CHECK-NEXT: adc r24, r1
- ; CHECK-NEXT: subi r22, 1
- ; CHECK-NEXT: brne .LBB0_1
+ ; CHECK-NEXT: dec r22
+ ; CHECK-NEXT: brpl .LBB0_1
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: ret
define i8 @ror8(i8 %val, i8 %amt) {
; CHECK: andi r22, 7
- ; CHECK-NEXT: cpi r22, 0
- ; CHECK-NEXT: breq .LBB1_2
+ ; CHECK-NEXT: dec r22
+ ; CHECK-NEXT: brmi .LBB1_2
; CHECK-NEXT: .LBB1_1:
; CHECK-NEXT: lsr r24
; CHECK-NEXT: ldi r0, 0
; CHECK-NEXT: ror r0
; CHECK-NEXT: or r24, r0
- ; CHECK-NEXT: subi r22, 1
- ; CHECK-NEXT: brne .LBB1_1
+ ; CHECK-NEXT: dec r22
+ ; CHECK-NEXT: brpl .LBB1_1
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ret
; RUN: llc < %s -march=avr | FileCheck %s
+; Optimize for speed.
+; CHECK-LABEL: shift_i8_i8_speed
+define i8 @shift_i8_i8_speed(i8 %a, i8 %b) {
+ ; CHECK: dec r22
+ ; CHECK-NEXT: brmi .LBB0_2
+ ; CHECK-NEXT: .LBB0_1:
+ ; CHECK-NEXT: lsl r24
+ ; CHECK-NEXT: dec r22
+ ; CHECK-NEXT: brpl .LBB0_1
+ ; CHECK-NEXT: .LBB0_2:
+ ; CHECK-NEXT: ret
+ %result = shl i8 %a, %b
+ ret i8 %result
+}
+
+; Optimize for size (producing slightly smaller code).
+; CHECK-LABEL: shift_i8_i8_size
+define i8 @shift_i8_i8_size(i8 %a, i8 %b) optsize {
+ ; CHECK: .LBB1_1:
+ ; CHECK-NEXT: dec r22
+ ; CHECK-NEXT: brmi .LBB1_3
+ ; CHECK: lsl r24
+ ; CHECK-NEXT: rjmp .LBB1_1
+ ; CHECK-NEXT: .LBB1_3:
+ ; CHECK-NEXT: ret
+ %result = shl i8 %a, %b
+ ret i8 %result
+}
+
+; CHECK-LABEL: shift_i16_i16
+define i16 @shift_i16_i16(i16 %a, i16 %b) {
+ ; CHECK: dec r22
+ ; CHECK-NEXT: brmi .LBB2_2
+ ; CHECK-NEXT: .LBB2_1:
+ ; CHECK-NEXT: lsl r24
+ ; CHECK-NEXT: rol r25
+ ; CHECK-NEXT: dec r22
+ ; CHECK-NEXT: brpl .LBB2_1
+ ; CHECK-NEXT: .LBB2_2:
+ ; CHECK-NEXT: ret
+ %result = shl i16 %a, %b
+ ret i16 %result
+}
+
; CHECK-LABEL: shift_i64_i64
define i64 @shift_i64_i64(i64 %a, i64 %b) {
; CHECK: call __ashldi3