BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg)
.addReg(STI.getZeroRegister());
+ // Do a shift modulo 6 or 7. This is a bit more complicated than most shifts
+ // and is hard to compose with the rest, so these are special cased.
+ // The basic idea is to shift one or two bits in the opposite direction and
+ // then move registers around to get the correct end result.
+ if (ShiftLeft && (ShiftAmt % 8) >= 6) {
+ // Left shift modulo 6 or 7.
+
+ // Create a slice of the registers we're going to modify, to ease working
+ // with them.
+ size_t ShiftRegsOffset = ShiftAmt / 8;
+ size_t ShiftRegsSize = Regs.size() - ShiftRegsOffset;
+ MutableArrayRef<std::pair<Register, int>> ShiftRegs =
+ Regs.slice(ShiftRegsOffset, ShiftRegsSize);
+
+ // Shift one to the right, keeping the least significant bit as the carry
+ // bit.
+ insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);
+
+ // Rotate the least significant bit from the carry bit into a new register
+ // (that starts out zero).
+ Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
+ BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg);
+
+ // Shift one more to the right if this is a modulo-6 shift.
+ if (ShiftAmt % 8 == 6) {
+ insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);
+ Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
+ BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte);
+ LowByte = NewLowByte;
+ }
+
+ // Move all registers to the left, zeroing the bottom registers as needed.
+ for (size_t I = 0; I < Regs.size(); I++) {
+ int ShiftRegsIdx = I + 1;
+ if (ShiftRegsIdx < (int)ShiftRegs.size()) {
+ Regs[I] = ShiftRegs[ShiftRegsIdx];
+ } else if (ShiftRegsIdx == (int)ShiftRegs.size()) {
+ Regs[I] = std::pair(LowByte, 0);
+ } else {
+ Regs[I] = std::pair(ZeroReg, 0);
+ }
+ }
+
+ return;
+ }
+
+ // Right shift modulo 6 or 7.
+ if (!ShiftLeft && (ShiftAmt % 8) >= 6) {
+ // Create a view on the registers we're going to modify, to ease working
+ // with them.
+ size_t ShiftRegsSize = Regs.size() - (ShiftAmt / 8);
+ MutableArrayRef<std::pair<Register, int>> ShiftRegs =
+ Regs.slice(0, ShiftRegsSize);
+
+ // Shift one to the left.
+ insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);
+
+ // Sign or zero extend the most significant register into a new register.
+ // The HighByte is the byte that still has one (or two) bits from the
+ // original value. The ExtByte is purely a zero/sign extend byte (all bits
+ // are either 0 or 1).
+ Register HighByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
+ Register ExtByte = 0;
+ if (ArithmeticShift) {
+ // Sign-extend bit that was shifted out last.
+ BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte)
+ .addReg(HighByte, RegState::Undef)
+ .addReg(HighByte, RegState::Undef);
+ ExtByte = HighByte;
+ // The highest bit of the original value is the same as the zero-extend
+ // byte, so HighByte and ExtByte are the same.
+ } else {
+ // Use the zero register for zero extending.
+ ExtByte = ZeroReg;
+ // Rotate most significant bit into a new register (that starts out zero).
+ BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte)
+ .addReg(ExtByte)
+ .addReg(ExtByte);
+ }
+
+ // Shift one more to the left for modulo 6 shifts.
+ if (ShiftAmt % 8 == 6) {
+ insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);
+ // Shift the topmost bit into the HighByte.
+ Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass);
+ BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt)
+ .addReg(HighByte)
+ .addReg(HighByte);
+ HighByte = NewExt;
+ }
+
+ // Move all to the right, while sign or zero extending.
+ for (int I = Regs.size() - 1; I >= 0; I--) {
+ int ShiftRegsIdx = I - (Regs.size() - ShiftRegs.size()) - 1;
+ if (ShiftRegsIdx >= 0) {
+ Regs[I] = ShiftRegs[ShiftRegsIdx];
+ } else if (ShiftRegsIdx == -1) {
+ Regs[I] = std::pair(HighByte, 0);
+ } else {
+ Regs[I] = std::pair(ExtByte, 0);
+ }
+ }
+
+ return;
+ }
+
// For shift amounts of at least one register, simply rename the registers and
// zero the bottom registers.
while (ShiftLeft && ShiftAmt >= 8) {
ret i32 %res
}
+; shift two to the right and move the registers around
+define i32 @shl_i32_6(i32 %a) {
+; CHECK-LABEL: shl_i32_6:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsr r25
+; CHECK-NEXT: ror r24
+; CHECK-NEXT: ror r23
+; CHECK-NEXT: ror r22
+; CHECK-NEXT: mov r18, r1
+; CHECK-NEXT: ror r18
+; CHECK-NEXT: lsr r25
+; CHECK-NEXT: ror r24
+; CHECK-NEXT: ror r23
+; CHECK-NEXT: ror r22
+; CHECK-NEXT: ror r18
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: mov r24, r23
+; CHECK-NEXT: mov r19, r22
+; CHECK-NEXT: movw r22, r18
+; CHECK-NEXT: ret
+ %res = shl i32 %a, 6
+ ret i32 %res
+}
+
+
+; shift one to the right and move registers around
+define i32 @shl_i32_7(i32 %a) {
+; CHECK-LABEL: shl_i32_7:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsr r25
+; CHECK-NEXT: ror r24
+; CHECK-NEXT: ror r23
+; CHECK-NEXT: ror r22
+; CHECK-NEXT: mov r18, r1
+; CHECK-NEXT: ror r18
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: mov r24, r23
+; CHECK-NEXT: mov r19, r22
+; CHECK-NEXT: movw r22, r18
+; CHECK-NEXT: ret
+ %res = shl i32 %a, 7
+ ret i32 %res
+}
+
define i32 @shl_i32_8(i32 %a) {
; CHECK-LABEL: shl_i32_8:
; CHECK: ; %bb.0:
ret i32 %res
}
+define i32 @shl_i32_15(i32 %a) {
+; CHECK-LABEL: shl_i32_15:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: movw r18, r22
+; CHECK-NEXT: lsr r24
+; CHECK-NEXT: ror r19
+; CHECK-NEXT: ror r18
+; CHECK-NEXT: mov r23, r1
+; CHECK-NEXT: ror r23
+; CHECK-NEXT: mov r22, r1
+; CHECK-NEXT: movw r24, r18
+; CHECK-NEXT: ret
+ %res = shl i32 %a, 15
+ ret i32 %res
+}
+
; This is a special case: this shift is performed directly inside SelectionDAG
; instead of as a custom lowering like the other shift operations.
define i32 @shl_i32_16(i32 %a) {
ret i32 %res
}
+; move the rightmost bit to the leftmost bit and clear the rest
+define i32 @shl_i32_31(i32 %a) {
+; CHECK-LABEL: shl_i32_31:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsr r22
+; CHECK-NEXT: mov r25, r1
+; CHECK-NEXT: ror r25
+; CHECK-NEXT: mov r24, r1
+; CHECK-NEXT: mov r23, r1
+; CHECK-NEXT: mov r22, r1
+; CHECK-NEXT: ret
+ %res = shl i32 %a, 31
+ ret i32 %res
+}
+
define i32 @lshr_i32_1(i32 %a) {
; CHECK-LABEL: lshr_i32_1:
; CHECK: ; %bb.0:
ret i32 %res
}
+define i32 @lshr_i32_6(i32 %a) {
+; CHECK-LABEL: lshr_i32_6:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsl r22
+; CHECK-NEXT: rol r23
+; CHECK-NEXT: rol r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: mov r19, r1
+; CHECK-NEXT: rol r19
+; CHECK-NEXT: lsl r22
+; CHECK-NEXT: rol r23
+; CHECK-NEXT: rol r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: rol r19
+; CHECK-NEXT: mov r18, r25
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: mov r24, r23
+; CHECK-NEXT: movw r22, r24
+; CHECK-NEXT: movw r24, r18
+; CHECK-NEXT: ret
+ %res = lshr i32 %a, 6
+ ret i32 %res
+}
+
+define i32 @lshr_i32_7(i32 %a) {
+; CHECK-LABEL: lshr_i32_7:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsl r22
+; CHECK-NEXT: rol r23
+; CHECK-NEXT: rol r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: mov r19, r1
+; CHECK-NEXT: rol r19
+; CHECK-NEXT: mov r18, r25
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: mov r24, r23
+; CHECK-NEXT: movw r22, r24
+; CHECK-NEXT: movw r24, r18
+; CHECK-NEXT: ret
+ %res = lshr i32 %a, 7
+ ret i32 %res
+}
+
define i32 @lshr_i32_8(i32 %a) {
; CHECK-LABEL: lshr_i32_8:
; CHECK: ; %bb.0:
ret i32 %res
}
+define i32 @lshr_i32_31(i32 %a) {
+; CHECK-LABEL: lshr_i32_31:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsl r25
+; CHECK-NEXT: mov r22, r1
+; CHECK-NEXT: rol r22
+; CHECK-NEXT: mov r25, r1
+; CHECK-NEXT: mov r24, r1
+; CHECK-NEXT: mov r23, r1
+; CHECK-NEXT: ret
+ %res = lshr i32 %a, 31
+ ret i32 %res
+}
+
define i32 @ashr_i32_1(i32 %a) {
; CHECK-LABEL: ashr_i32_1:
; CHECK: ; %bb.0:
ret i32 %res
}
+define i32 @ashr_i32_7(i32 %a) {
+; CHECK-LABEL: ashr_i32_7:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsl r22
+; CHECK-NEXT: rol r23
+; CHECK-NEXT: rol r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: sbc r19, r19
+; CHECK-NEXT: mov r18, r25
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: mov r24, r23
+; CHECK-NEXT: movw r22, r24
+; CHECK-NEXT: movw r24, r18
+; CHECK-NEXT: ret
+ %res = ashr i32 %a, 7
+ ret i32 %res
+}
+
; TODO: this could be optimized to 4 movs, instead of 6.
define i32 @ashr_i32_8(i32 %a) {
; CHECK-LABEL: ashr_i32_8:
%res = ashr i32 %a, 17
ret i32 %res
}
+
+define i32 @ashr_i32_22(i32 %a) {
+; CHECK-LABEL: ashr_i32_22:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsl r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: sbc r19, r19
+; CHECK-NEXT: lsl r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: mov r23, r19
+; CHECK-NEXT: rol r23
+; CHECK-NEXT: mov r18, r19
+; CHECK-NEXT: mov r22, r25
+; CHECK-NEXT: movw r24, r18
+; CHECK-NEXT: ret
+ %res = ashr i32 %a, 22
+ ret i32 %res
+}
+
+define i32 @ashr_i32_23(i32 %a) {
+; CHECK-LABEL: ashr_i32_23:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsl r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: sbc r19, r19
+; CHECK-NEXT: mov r18, r19
+; CHECK-NEXT: mov r23, r19
+; CHECK-NEXT: mov r22, r25
+; CHECK-NEXT: movw r24, r18
+; CHECK-NEXT: ret
+ %res = ashr i32 %a, 23
+ ret i32 %res
+}
+
+define i32 @ashr_i32_30(i32 %a) {
+; CHECK-LABEL: ashr_i32_30:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsl r25
+; CHECK-NEXT: sbc r19, r19
+; CHECK-NEXT: lsl r25
+; CHECK-NEXT: mov r22, r19
+; CHECK-NEXT: rol r22
+; CHECK-NEXT: mov r18, r19
+; CHECK-NEXT: mov r23, r19
+; CHECK-NEXT: movw r24, r18
+; CHECK-NEXT: ret
+ %res = ashr i32 %a, 30
+ ret i32 %res
+}
+
+define i32 @ashr_i32_31(i32 %a) {
+; CHECK-LABEL: ashr_i32_31:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsl r25
+; CHECK-NEXT: sbc r23, r23
+; CHECK-NEXT: mov r22, r23
+; CHECK-NEXT: movw r24, r22
+; CHECK-NEXT: ret
+ %res = ashr i32 %a, 31
+ ret i32 %res
+}