return false;
}
+// Get the index of the program memory bank.
+// -1: not program memory
+// 0: ordinary program memory
+// 1~5: extended program memory
+inline int getProgramMemoryBank(MemSDNode const *N) {
+ auto *V = N->getMemOperand()->getValue();
+ if (V == nullptr || !isProgramMemoryAddress(V))
+ return -1;
+ AddressSpace AS = getAddressSpace(V);
+ assert(ProgramMemory <= AS && AS <= ProgramMemory5);
+ return static_cast<int>(AS - ProgramMemory);
+}
+
} // end of namespace AVR
} // end namespace llvm
bool expandLSLW12Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW12Rd(Block &MBB, BlockIt MBBI);
+ // Common implementation of LPMWRdZ and ELPMWRdZ.
+ bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt);
+
/// Scavenges a free GPR8 register for use.
Register scavengeGPR8(MachineInstr &MI);
};
return true;
}
-template <>
-bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
Register TmpReg = 0; // 0 for no temporary register
Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
- unsigned OpLo = AVR::LPMRdZPi;
- unsigned OpHi = AVR::LPMRdZ;
+ unsigned OpLo = IsExt ? AVR::ELPMRdZPi : AVR::LPMRdZPi;
+ unsigned OpHi = IsExt ? AVR::ELPMRdZ : AVR::LPMRdZ;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+ // Set the I/O register RAMPZ for ELPM.
+ if (IsExt) {
+ const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
+ Register Bank = MI.getOperand(2).getReg();
+ // out RAMPZ, rtmp
+ buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(Bank);
+ }
+
// Use a temporary register if src and dst registers are the same.
if (DstReg == SrcReg)
TmpReg = scavengeGPR8(MI);
}
template <>
+bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
+ return expandLPMWELPMW(MBB, MBBI, false);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ELPMWRdZ>(Block &MBB, BlockIt MBBI) {
+ return expandLPMWELPMW(MBB, MBBI, true);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ELPMBRdZ>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register BankReg = MI.getOperand(2).getReg();
+ bool SrcIsKill = MI.getOperand(1).isKill();
+ const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
+
+ // Set the I/O register RAMPZ for ELPM (out RAMPZ, rtmp).
+ buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg);
+
+ // Load byte.
+ auto MILB = buildMI(MBB, MBBI, AVR::ELPMRdZ)
+ .addReg(DstReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
+
+ MILB.setMemRefs(MI.memoperands());
+
+ MI.eraseFromParent();
+ return true;
+}
+
+template <>
bool AVRExpandPseudo::expand<AVR::LPMWRdZPi>(Block &MBB, BlockIt MBBI) {
- llvm_unreachable("wide LPMPi is unimplemented");
+ llvm_unreachable("16-bit LPMPi is unimplemented");
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ELPMBRdZPi>(Block &MBB, BlockIt MBBI) {
+ llvm_unreachable("byte ELPMPi is unimplemented");
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ELPMWRdZPi>(Block &MBB, BlockIt MBBI) {
+ llvm_unreachable("16-bit ELPMPi is unimplemented");
}
template <typename Func>
EXPAND(AVR::LDDWRdPtrQ);
EXPAND(AVR::LPMWRdZ);
EXPAND(AVR::LPMWRdZPi);
+ EXPAND(AVR::ELPMBRdZ);
+ EXPAND(AVR::ELPMWRdZ);
+ EXPAND(AVR::ELPMBRdZPi);
+ EXPAND(AVR::ELPMWRdZPi);
EXPAND(AVR::AtomicLoad8);
EXPAND(AVR::AtomicLoad16);
EXPAND(AVR::AtomicStore8);
bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Disp);
bool selectIndexedLoad(SDNode *N);
- unsigned selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT);
+ unsigned selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT, int Bank);
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode,
std::vector<SDValue> &OutOps) override;
return true;
}
-unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD,
- MVT VT) {
- ISD::MemIndexedMode AM = LD->getAddressingMode();
-
+unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT,
+ int Bank) {
// Progmem indexed loads only work in POSTINC mode.
- if (LD->getExtensionType() != ISD::NON_EXTLOAD || AM != ISD::POST_INC) {
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
+ LD->getAddressingMode() != ISD::POST_INC)
return 0;
- }
+
+ // Feature ELPM is needed for loading from extended program memory.
+ assert((Bank == 0 || Subtarget->hasELPM()) &&
+ "cannot load from extended program memory on this mcu");
unsigned Opcode = 0;
int Offs = cast<ConstantSDNode>(LD->getOffset())->getSExtValue();
switch (VT.SimpleTy) {
- case MVT::i8: {
- if (Offs != 1) {
- return 0;
- }
- Opcode = AVR::LPMRdZPi;
+ case MVT::i8:
+ if (Offs == 1)
+ Opcode = Bank > 0 ? AVR::ELPMBRdZPi : AVR::LPMRdZPi;
break;
- }
- case MVT::i16: {
- if (Offs != 2) {
- return 0;
- }
- Opcode = AVR::LPMWRdZPi;
+ case MVT::i16:
+ if (Offs == 2)
+ Opcode = Bank > 0 ? AVR::ELPMWRdZPi : AVR::LPMWRdZPi;
break;
- }
default:
- return 0;
+ break;
}
return Opcode;
return selectIndexedLoad(N);
}
- assert(Subtarget->hasLPM() && "cannot load from program memory on this mcu");
+ if (!Subtarget->hasLPM())
+ report_fatal_error("cannot load from program memory on this mcu");
+
+ int ProgMemBank = AVR::getProgramMemoryBank(LD);
+ if (ProgMemBank < 0 || ProgMemBank > 5)
+ report_fatal_error("unexpected program memory bank");
// This is a flash memory load, move the pointer into R31R30 and emit
// the lpm instruction.
Ptr = CurDAG->getCopyFromReg(Chain, DL, AVR::R31R30, MVT::i16,
Chain.getValue(1));
- SDValue RegZ = CurDAG->getRegister(AVR::R31R30, MVT::i16);
-
// Check if the opcode can be converted into an indexed load.
- if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT)) {
+ if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT, ProgMemBank)) {
// It is legal to fold the load into an indexed load.
- ResNode =
- CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr, RegZ);
- ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
+ if (ProgMemBank == 0) {
+ ResNode =
+ CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr);
+ } else {
+ // Do not combine the LDI instruction into the ELPM pseudo instruction,
+ // since it may be reused by other ELPM pseudo instructions.
+ SDValue NC = CurDAG->getTargetConstant(ProgMemBank, DL, MVT::i8);
+ auto *NP = CurDAG->getMachineNode(AVR::LDIRdK, DL, MVT::i8, NC);
+ ResNode = CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other,
+ Ptr, SDValue(NP, 0));
+ }
} else {
// Selecting an indexed load is not legal, fallback to a normal load.
switch (VT.SimpleTy) {
case MVT::i8:
- ResNode = CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other,
- Ptr, RegZ);
+ if (ProgMemBank == 0) {
+ ResNode =
+ CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other, Ptr);
+ } else {
+ // Do not combine the LDI instruction into the ELPM pseudo instruction,
+ // since it may be reused by other ELPM pseudo instructions.
+ SDValue NC = CurDAG->getTargetConstant(ProgMemBank, DL, MVT::i8);
+ auto *NP = CurDAG->getMachineNode(AVR::LDIRdK, DL, MVT::i8, NC);
+ ResNode = CurDAG->getMachineNode(AVR::ELPMBRdZ, DL, MVT::i8, MVT::Other,
+ Ptr, SDValue(NP, 0));
+ }
break;
case MVT::i16:
- ResNode = CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16, MVT::Other,
- Ptr, RegZ);
- ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
+ if (ProgMemBank == 0) {
+ ResNode =
+ CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16, MVT::Other, Ptr);
+ } else {
+ // Do not combine the LDI instruction into the ELPM pseudo instruction,
+ // since LDI requires the destination register in range R16~R31.
+ SDValue NC = CurDAG->getTargetConstant(ProgMemBank, DL, MVT::i8);
+ auto *NP = CurDAG->getMachineNode(AVR::LDIRdK, DL, MVT::i8, NC);
+ ResNode = CurDAG->getMachineNode(AVR::ELPMWRdZ, DL, MVT::i16,
+ MVT::Other, Ptr, SDValue(NP, 0));
+ }
break;
default:
llvm_unreachable("Unsupported VT!");
: F16<0b1001010111011000, (outs), (ins), "elpm", []>,
Requires<[HasELPM]>;
- def ELPMRdZ : FLPMX<1, 0,
- (outs GPR8
- : $dst),
- (ins ZREG
- : $z),
+ def ELPMRdZ : FLPMX<1, 0, (outs GPR8:$dst), (ins ZREG:$z),
"elpm\t$dst, $z", []>,
Requires<[HasELPMX]>;
- let Defs = [R31R30] in def ELPMRdZPi : FLPMX<1, 1,
- (outs GPR8
- : $dst),
- (ins ZREG
- : $z),
- "elpm\t$dst, $z+", []>,
- Requires<[HasELPMX]>;
+ let Defs = [R31R30] in {
+ def ELPMRdZPi : FLPMX<1, 1, (outs GPR8:$dst), (ins ZREG:$z),
+ "elpm\t$dst, $z+", []>,
+ Requires<[HasELPMX]>;
+ }
+
+ // These pseudos are combination of the OUT and ELPM instructions.
+ let Defs = [R31R30], hasSideEffects = 1 in {
+ def ELPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p),
+ "elpmb\t$dst, $z, $p", []>,
+ Requires<[HasELPMX]>;
+
+ def ELPMWRdZ : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, LD8:$p),
+ "elpmw\t$dst, $z, $p", []>,
+ Requires<[HasELPMX]>;
+
+ def ELPMBRdZPi : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p),
+ "elpmb\t$dst, $z+, $p", []>,
+ Requires<[HasELPMX]>;
+
+ def ELPMWRdZPi : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, LD8:$p),
+ "elpmw\t$dst, $z+, $p", []>,
+ Requires<[HasELPMX]>;
+ }
}
// Store program memory operations.
return ELFArch;
}
+ /// Get I/O register address.
+ int getIORegRAMPZ(void) const { return 0x3b; }
+
private:
/// The ELF e_flags architecture.
unsigned ELFArch;
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=avr --mcpu=atmega2560 -verify-machineinstrs | FileCheck %s
+
+@arr0 = addrspace(1) constant [4 x i16] [i16 123, i16 24, i16 56, i16 37], align 1
+@arr1 = addrspace(2) constant [4 x i16] [i16 123, i16 34, i16 46, i16 27], align 1
+@arr2 = addrspace(3) constant [4 x i16] [i16 123, i16 23, i16 45, i16 17], align 1
+
+define i16 @foo0(i16 %a, i16 %b) {
+; CHECK-LABEL: foo0:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: lsl r22
+; CHECK-NEXT: rol r23
+; CHECK-NEXT: subi r22, -lo8(arr0)
+; CHECK-NEXT: sbci r23, -hi8(arr0)
+; CHECK-NEXT: movw r30, r22
+; CHECK-NEXT: lpm r18, Z+
+; CHECK-NEXT: lpm r19, Z
+; CHECK-NEXT: lsl r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: subi r24, -lo8(arr0)
+; CHECK-NEXT: sbci r25, -hi8(arr0)
+; CHECK-NEXT: movw r30, r24
+; CHECK-NEXT: lpm r24, Z+
+; CHECK-NEXT: lpm r25, Z
+; CHECK-NEXT: sub r24, r18
+; CHECK-NEXT: sbc r25, r19
+; CHECK-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %a
+ %0 = load i16, i16 addrspace(1)* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %b
+ %1 = load i16, i16 addrspace(1)* %arrayidx1, align 1
+ %sub = sub nsw i16 %0, %1
+ ret i16 %sub
+}
+
+define i16 @foo1(i16 %a, i16 %b) {
+; CHECK-LABEL: foo1:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: lsl r22
+; CHECK-NEXT: rol r23
+; CHECK-NEXT: subi r22, -lo8(arr1)
+; CHECK-NEXT: sbci r23, -hi8(arr1)
+; CHECK-NEXT: movw r30, r22
+; CHECK-NEXT: ldi r18, 1
+; CHECK-NEXT: out 59, r18
+; CHECK-NEXT: elpm r18, Z+
+; CHECK-NEXT: elpm r19, Z
+; CHECK-NEXT: lsl r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: subi r24, -lo8(arr0)
+; CHECK-NEXT: sbci r25, -hi8(arr0)
+; CHECK-NEXT: movw r30, r24
+; CHECK-NEXT: lpm r24, Z+
+; CHECK-NEXT: lpm r25, Z
+; CHECK-NEXT: sub r24, r18
+; CHECK-NEXT: sbc r25, r19
+; CHECK-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %a
+ %0 = load i16, i16 addrspace(1)* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(2)* @arr1, i16 0, i16 %b
+ %1 = load i16, i16 addrspace(2)* %arrayidx1, align 1
+ %sub = sub nsw i16 %0, %1
+ ret i16 %sub
+}
+
+define i16 @foo2(i16 %a, i16 %b) {
+; CHECK-LABEL: foo2:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: lsl r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: subi r24, -lo8(arr2)
+; CHECK-NEXT: sbci r25, -hi8(arr2)
+; CHECK-NEXT: movw r30, r24
+; CHECK-NEXT: ldi r24, 2
+; CHECK-NEXT: out 59, r24
+; CHECK-NEXT: elpm r24, Z+
+; CHECK-NEXT: elpm r25, Z
+; CHECK-NEXT: lsl r22
+; CHECK-NEXT: rol r23
+; CHECK-NEXT: subi r22, -lo8(arr0)
+; CHECK-NEXT: sbci r23, -hi8(arr0)
+; CHECK-NEXT: movw r30, r22
+; CHECK-NEXT: lpm r18, Z+
+; CHECK-NEXT: lpm r19, Z
+; CHECK-NEXT: sub r24, r18
+; CHECK-NEXT: sbc r25, r19
+; CHECK-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(3)* @arr2, i16 0, i16 %a
+ %0 = load i16, i16 addrspace(3)* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %b
+ %1 = load i16, i16 addrspace(1)* %arrayidx1, align 1
+ %sub = sub nsw i16 %0, %1
+ ret i16 %sub
+}
+
+define i16 @foo3(i16 %a, i16 %b) {
+; CHECK-LABEL: foo3:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: lsl r22
+; CHECK-NEXT: rol r23
+; CHECK-NEXT: subi r22, -lo8(arr1)
+; CHECK-NEXT: sbci r23, -hi8(arr1)
+; CHECK-NEXT: movw r30, r22
+; CHECK-NEXT: ldi r18, 1
+; CHECK-NEXT: out 59, r18
+; CHECK-NEXT: elpm r18, Z+
+; CHECK-NEXT: elpm r19, Z
+; CHECK-NEXT: lsl r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: subi r24, -lo8(arr2)
+; CHECK-NEXT: sbci r25, -hi8(arr2)
+; CHECK-NEXT: movw r30, r24
+; CHECK-NEXT: ldi r24, 2
+; CHECK-NEXT: out 59, r24
+; CHECK-NEXT: elpm r24, Z+
+; CHECK-NEXT: elpm r25, Z
+; CHECK-NEXT: sub r24, r18
+; CHECK-NEXT: sbc r25, r19
+; CHECK-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(3)* @arr2, i16 0, i16 %a
+ %0 = load i16, i16 addrspace(3)* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(2)* @arr1, i16 0, i16 %b
+ %1 = load i16, i16 addrspace(2)* %arrayidx1, align 1
+ %sub = sub nsw i16 %0, %1
+ ret i16 %sub
+}
+
+@arrb1 = addrspace(1) constant [4 x i8] c"{\188%", align 1
+@arrb3 = addrspace(3) constant [4 x i8] c"{\22.\1B", align 1
+@arrb5 = addrspace(5) constant [4 x i8] c"{\17-\11", align 1
+
+define signext i8 @foob0(i16 %a, i16 %b) {
+; CHECK-LABEL: foob0:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: subi r22, -lo8(arrb1)
+; CHECK-NEXT: sbci r23, -hi8(arrb1)
+; CHECK-NEXT: movw r30, r22
+; CHECK-NEXT: lpm r18, Z
+; CHECK-NEXT: subi r24, -lo8(arrb1)
+; CHECK-NEXT: sbci r25, -hi8(arrb1)
+; CHECK-NEXT: movw r30, r24
+; CHECK-NEXT: lpm r24, Z
+; CHECK-NEXT: sub r24, r18
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: lsl r25
+; CHECK-NEXT: sbc r25, r25
+; CHECK-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %a
+ %0 = load i8, i8 addrspace(1)* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %b
+ %1 = load i8, i8 addrspace(1)* %arrayidx1, align 1
+ %sub = sub i8 %0, %1
+ ret i8 %sub
+}
+
+define signext i8 @foob1(i16 %a, i16 %b) {
+; CHECK-LABEL: foob1:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: subi r22, -lo8(arrb3)
+; CHECK-NEXT: sbci r23, -hi8(arrb3)
+; CHECK-NEXT: movw r30, r22
+; CHECK-NEXT: ldi r18, 2
+; CHECK-NEXT: out 59, r18
+; CHECK-NEXT: elpm r18, Z
+; CHECK-NEXT: subi r24, -lo8(arrb1)
+; CHECK-NEXT: sbci r25, -hi8(arrb1)
+; CHECK-NEXT: movw r30, r24
+; CHECK-NEXT: lpm r24, Z
+; CHECK-NEXT: sub r24, r18
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: lsl r25
+; CHECK-NEXT: sbc r25, r25
+; CHECK-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %a
+ %0 = load i8, i8 addrspace(1)* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %b
+ %1 = load i8, i8 addrspace(3)* %arrayidx1, align 1
+ %sub = sub i8 %0, %1
+ ret i8 %sub
+}
+
+define signext i8 @foob2(i16 %a, i16 %b) {
+; CHECK-LABEL: foob2:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: subi r24, -lo8(arrb5)
+; CHECK-NEXT: sbci r25, -hi8(arrb5)
+; CHECK-NEXT: movw r30, r24
+; CHECK-NEXT: ldi r24, 4
+; CHECK-NEXT: out 59, r24
+; CHECK-NEXT: elpm r24, Z
+; CHECK-NEXT: subi r22, -lo8(arrb1)
+; CHECK-NEXT: sbci r23, -hi8(arrb1)
+; CHECK-NEXT: movw r30, r22
+; CHECK-NEXT: lpm r25, Z
+; CHECK-NEXT: sub r24, r25
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: lsl r25
+; CHECK-NEXT: sbc r25, r25
+; CHECK-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* @arrb5, i16 0, i16 %a
+ %0 = load i8, i8 addrspace(5)* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %b
+ %1 = load i8, i8 addrspace(1)* %arrayidx1, align 1
+ %sub = sub i8 %0, %1
+ ret i8 %sub
+}
+
+define signext i8 @foob3(i16 %a, i16 %b) {
+; CHECK-LABEL: foob3:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: subi r22, -lo8(arrb5)
+; CHECK-NEXT: sbci r23, -hi8(arrb5)
+; CHECK-NEXT: movw r30, r22
+; CHECK-NEXT: ldi r18, 4
+; CHECK-NEXT: out 59, r18
+; CHECK-NEXT: elpm r18, Z
+; CHECK-NEXT: subi r24, -lo8(arrb3)
+; CHECK-NEXT: sbci r25, -hi8(arrb3)
+; CHECK-NEXT: movw r30, r24
+; CHECK-NEXT: ldi r24, 2
+; CHECK-NEXT: out 59, r24
+; CHECK-NEXT: elpm r24, Z
+; CHECK-NEXT: sub r24, r18
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: lsl r25
+; CHECK-NEXT: sbc r25, r25
+; CHECK-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %a
+ %0 = load i8, i8 addrspace(3)* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* @arrb5, i16 0, i16 %b
+ %1 = load i8, i8 addrspace(5)* %arrayidx1, align 1
+ %sub = sub i8 %0, %1
+ ret i8 %sub
+}
+
+define signext i8 @foob4(i16 %a, i16 %b) {
+; CHECK-LABEL: foob4:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: subi r22, -lo8(arrb3)
+; CHECK-NEXT: sbci r23, -hi8(arrb3)
+; CHECK-NEXT: movw r30, r22
+; CHECK-NEXT: ldi r18, 2
+; CHECK-NEXT: out 59, r18
+; CHECK-NEXT: elpm r19, Z
+; CHECK-NEXT: subi r24, -lo8(arrb3)
+; CHECK-NEXT: sbci r25, -hi8(arrb3)
+; CHECK-NEXT: movw r30, r24
+; CHECK-NEXT: out 59, r18
+; CHECK-NEXT: elpm r24, Z
+; CHECK-NEXT: sub r24, r19
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: lsl r25
+; CHECK-NEXT: sbc r25, r25
+; CHECK-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %a
+ %0 = load i8, i8 addrspace(3)* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %b
+ %1 = load i8, i8 addrspace(3)* %arrayidx1, align 1
+ %sub = sub i8 %0, %1
+ ret i8 %sub
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=avr --mcpu=atmega328 -O0 -verify-machineinstrs \
+; RUN: | FileCheck -check-prefix=CHECK-O0 %s
+; RUN: llc < %s -mtriple=avr --mcpu=atmega328 -O3 -verify-machineinstrs \
+; RUN: | FileCheck -check-prefix=CHECK-O3 %s
+
+@arr0 = addrspace(1) constant [4 x i16] [i16 123, i16 234, i16 456, i16 67], align 1
+@arr1 = addrspace(1) constant [4 x i8] c"ABCD", align 1
+
+define i16 @foo0(i16 %a) addrspace(1) {
+; CHECK-O0-LABEL: foo0:
+; CHECK-O0: ; %bb.0: ; %entry
+; CHECK-O0-NEXT: push r28
+; CHECK-O0-NEXT: push r29
+; CHECK-O0-NEXT: in r28, 61
+; CHECK-O0-NEXT: in r29, 62
+; CHECK-O0-NEXT: sbiw r28, 2
+; CHECK-O0-NEXT: in r0, 63
+; CHECK-O0-NEXT: cli
+; CHECK-O0-NEXT: out 62, r29
+; CHECK-O0-NEXT: out 63, r0
+; CHECK-O0-NEXT: out 61, r28
+; CHECK-O0-NEXT: std Y+1, r24
+; CHECK-O0-NEXT: std Y+2, r25
+; CHECK-O0-NEXT: ldd r24, Y+1
+; CHECK-O0-NEXT: ldd r25, Y+2
+; CHECK-O0-NEXT: lsl r24
+; CHECK-O0-NEXT: rol r25
+; CHECK-O0-NEXT: subi r24, -lo8(arr0)
+; CHECK-O0-NEXT: sbci r25, -hi8(arr0)
+; CHECK-O0-NEXT: movw r30, r24
+; CHECK-O0-NEXT: lpm r24, Z+
+; CHECK-O0-NEXT: lpm r25, Z
+; CHECK-O0-NEXT: adiw r28, 2
+; CHECK-O0-NEXT: in r0, 63
+; CHECK-O0-NEXT: cli
+; CHECK-O0-NEXT: out 62, r29
+; CHECK-O0-NEXT: out 63, r0
+; CHECK-O0-NEXT: out 61, r28
+; CHECK-O0-NEXT: pop r29
+; CHECK-O0-NEXT: pop r28
+; CHECK-O0-NEXT: ret
+;
+; CHECK-O3-LABEL: foo0:
+; CHECK-O3: ; %bb.0: ; %entry
+; CHECK-O3-NEXT: push r28
+; CHECK-O3-NEXT: push r29
+; CHECK-O3-NEXT: in r28, 61
+; CHECK-O3-NEXT: in r29, 62
+; CHECK-O3-NEXT: sbiw r28, 2
+; CHECK-O3-NEXT: in r0, 63
+; CHECK-O3-NEXT: cli
+; CHECK-O3-NEXT: out 62, r29
+; CHECK-O3-NEXT: out 63, r0
+; CHECK-O3-NEXT: out 61, r28
+; CHECK-O3-NEXT: std Y+1, r24
+; CHECK-O3-NEXT: std Y+2, r25
+; CHECK-O3-NEXT: lsl r24
+; CHECK-O3-NEXT: rol r25
+; CHECK-O3-NEXT: subi r24, -lo8(arr0)
+; CHECK-O3-NEXT: sbci r25, -hi8(arr0)
+; CHECK-O3-NEXT: movw r30, r24
+; CHECK-O3-NEXT: lpm r24, Z+
+; CHECK-O3-NEXT: lpm r25, Z
+; CHECK-O3-NEXT: adiw r28, 2
+; CHECK-O3-NEXT: in r0, 63
+; CHECK-O3-NEXT: cli
+; CHECK-O3-NEXT: out 62, r29
+; CHECK-O3-NEXT: out 63, r0
+; CHECK-O3-NEXT: out 61, r28
+; CHECK-O3-NEXT: pop r29
+; CHECK-O3-NEXT: pop r28
+; CHECK-O3-NEXT: ret
+entry:
+ %a.addr = alloca i16, align 1
+ store i16 %a, i16* %a.addr, align 1
+ %0 = load i16, i16* %a.addr, align 1
+ %arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %0
+ %1 = load i16, i16 addrspace(1)* %arrayidx, align 1
+ ret i16 %1
+}
+
+define i8 @foo1(i16 %a) addrspace(1) {
+; CHECK-O0-LABEL: foo1:
+; CHECK-O0: ; %bb.0: ; %entry
+; CHECK-O0-NEXT: push r28
+; CHECK-O0-NEXT: push r29
+; CHECK-O0-NEXT: in r28, 61
+; CHECK-O0-NEXT: in r29, 62
+; CHECK-O0-NEXT: sbiw r28, 2
+; CHECK-O0-NEXT: in r0, 63
+; CHECK-O0-NEXT: cli
+; CHECK-O0-NEXT: out 62, r29
+; CHECK-O0-NEXT: out 63, r0
+; CHECK-O0-NEXT: out 61, r28
+; CHECK-O0-NEXT: std Y+1, r24
+; CHECK-O0-NEXT: std Y+2, r25
+; CHECK-O0-NEXT: ldd r24, Y+1
+; CHECK-O0-NEXT: ldd r25, Y+2
+; CHECK-O0-NEXT: subi r24, -lo8(arr1)
+; CHECK-O0-NEXT: sbci r25, -hi8(arr1)
+; CHECK-O0-NEXT: movw r30, r24
+; CHECK-O0-NEXT: lpm r24, Z
+; CHECK-O0-NEXT: adiw r28, 2
+; CHECK-O0-NEXT: in r0, 63
+; CHECK-O0-NEXT: cli
+; CHECK-O0-NEXT: out 62, r29
+; CHECK-O0-NEXT: out 63, r0
+; CHECK-O0-NEXT: out 61, r28
+; CHECK-O0-NEXT: pop r29
+; CHECK-O0-NEXT: pop r28
+; CHECK-O0-NEXT: ret
+;
+; CHECK-O3-LABEL: foo1:
+; CHECK-O3: ; %bb.0: ; %entry
+; CHECK-O3-NEXT: push r28
+; CHECK-O3-NEXT: push r29
+; CHECK-O3-NEXT: in r28, 61
+; CHECK-O3-NEXT: in r29, 62
+; CHECK-O3-NEXT: sbiw r28, 2
+; CHECK-O3-NEXT: in r0, 63
+; CHECK-O3-NEXT: cli
+; CHECK-O3-NEXT: out 62, r29
+; CHECK-O3-NEXT: out 63, r0
+; CHECK-O3-NEXT: out 61, r28
+; CHECK-O3-NEXT: std Y+1, r24
+; CHECK-O3-NEXT: std Y+2, r25
+; CHECK-O3-NEXT: subi r24, -lo8(arr1)
+; CHECK-O3-NEXT: sbci r25, -hi8(arr1)
+; CHECK-O3-NEXT: movw r30, r24
+; CHECK-O3-NEXT: lpm r24, Z
+; CHECK-O3-NEXT: adiw r28, 2
+; CHECK-O3-NEXT: in r0, 63
+; CHECK-O3-NEXT: cli
+; CHECK-O3-NEXT: out 62, r29
+; CHECK-O3-NEXT: out 63, r0
+; CHECK-O3-NEXT: out 61, r28
+; CHECK-O3-NEXT: pop r29
+; CHECK-O3-NEXT: pop r28
+; CHECK-O3-NEXT: ret
+entry:
+ %a.addr = alloca i16, align 1
+ store i16 %a, i16* %a.addr, align 1
+ %0 = load i16, i16* %a.addr, align 1
+ %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arr1, i16 0, i16 %0
+ %1 = load i8, i8 addrspace(1)* %arrayidx, align 1
+ ret i8 %1
+}