MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
static unsigned ShrinkCounter = 0;
+ if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() &&
+ MF.getFunction().isVarArg())
+ return;
if (ShrinkLimit.getPosition()) {
if (ShrinkCounter >= ShrinkLimit)
return;
DebugLoc dl = MBB.findDebugLoc(InsertPt);
+ if (MF.getFunction().isVarArg() &&
+ MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {
+ // Calculate the size of register saved area.
+ int NumVarArgRegs = 6 - FirstVarArgSavedReg;
+ int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0)
+ ? NumVarArgRegs * 4
+ : NumVarArgRegs * 4 + 4;
+ if (RegisterSavedAreaSizePlusPadding > 0) {
+ // Decrement the stack pointer by size of register saved area plus
+ // padding if any.
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(-RegisterSavedAreaSizePlusPadding)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ int NumBytes = 0;
+ // Copy all the named arguments below register saved area.
+ auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ for (int i = HMFI.getFirstNamedArgFrameIndex(),
+ e = HMFI.getLastNamedArgFrameIndex(); i >= e; --i) {
+ int ObjSize = MFI.getObjectSize(i);
+ int ObjAlign = MFI.getObjectAlignment(i);
+
+ // Determine the kind of load/store that should be used.
+ unsigned LDOpc, STOpc;
+ int OpcodeChecker = ObjAlign;
+
+ // Handle cases where alignment of an object is > its size.
+ if (ObjSize < ObjAlign) {
+ if (ObjSize <= 1)
+ OpcodeChecker = 1;
+ else if (ObjSize <= 2)
+ OpcodeChecker = 2;
+ else if (ObjSize <= 4)
+ OpcodeChecker = 4;
+ else if (ObjSize > 4)
+ OpcodeChecker = 8;
+ }
+
+ switch (OpcodeChecker) {
+ case 1:
+ LDOpc = Hexagon::L2_loadrb_io;
+ STOpc = Hexagon::S2_storerb_io;
+ break;
+ case 2:
+ LDOpc = Hexagon::L2_loadrh_io;
+ STOpc = Hexagon::S2_storerh_io;
+ break;
+ case 4:
+ LDOpc = Hexagon::L2_loadri_io;
+ STOpc = Hexagon::S2_storeri_io;
+ break;
+ case 8:
+ default:
+ LDOpc = Hexagon::L2_loadrd_io;
+ STOpc = Hexagon::S2_storerd_io;
+ break;
+ }
+
+ unsigned RegUsed = LDOpc == Hexagon::L2_loadrd_io ? Hexagon::D3
+ : Hexagon::R6;
+ int LoadStoreCount = ObjSize / OpcodeChecker;
+
+ if (ObjSize % OpcodeChecker)
+ ++LoadStoreCount;
+
+ // Get the start location of the load. NumBytes is basically the
+ // offset from the stack pointer of previous function, which would be
+ // the caller in this case, as this function has variable argument
+ // list.
+ if (NumBytes != 0)
+ NumBytes = alignTo(NumBytes, ObjAlign);
+
+ int Count = 0;
+ while (Count < LoadStoreCount) {
+ // Load the value of the named argument on stack.
+ BuildMI(MBB, InsertPt, dl, HII.get(LDOpc), RegUsed)
+ .addReg(SP)
+ .addImm(RegisterSavedAreaSizePlusPadding +
+ ObjAlign * Count + NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Store it below the register saved area plus padding.
+ BuildMI(MBB, InsertPt, dl, HII.get(STOpc))
+ .addReg(SP)
+ .addImm(ObjAlign * Count + NumBytes)
+ .addReg(RegUsed)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ Count++;
+ }
+ NumBytes += MFI.getObjectSize(i);
+ }
+
+ // Make NumBytes 8 byte aligned
+ NumBytes = alignTo(NumBytes, 8);
+
+ // If the number of registers having variable arguments is odd,
+ // leave 4 bytes of padding to get to the location where first
+ // variable argument which was passed through register was copied.
+ NumBytes = (NumVarArgRegs % 2 == 0) ? NumBytes : NumBytes + 4;
+
+ for (int j = FirstVarArgSavedReg, i = 0; j < 6; ++j, ++i) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_storeri_io))
+ .addReg(SP)
+ .addImm(NumBytes + 4 * i)
+ .addReg(Hexagon::R0 + j)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+ }
+
if (hasFP(MF)) {
insertAllocframe(MBB, InsertPt, NumBytes);
if (AlignStack) {
if (!hasFP(MF)) {
MachineFrameInfo &MFI = MF.getFrameInfo();
- if (unsigned NumBytes = MFI.getStackSize()) {
+ unsigned NumBytes = MFI.getStackSize();
+ if (MF.getFunction().isVarArg() &&
+ MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {
+ // On Hexagon Linux, deallocate the stack for the register saved area.
+ int NumVarArgRegs = 6 - FirstVarArgSavedReg;
+ int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?
+ (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);
+ NumBytes += RegisterSavedAreaSizePlusPadding;
+ }
+ if (NumBytes) {
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
.addReg(SP)
.addImm(NumBytes);
NeedsDeallocframe = false;
}
- if (!NeedsDeallocframe)
- return;
- // If the returning instruction is PS_jmpret, replace it with dealloc_return,
- // otherwise just add deallocframe. The function could be returning via a
- // tail call.
- if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
+ if (!MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() ||
+ !MF.getFunction().isVarArg()) {
+ if (!NeedsDeallocframe)
+ return;
+ // If the returning instruction is PS_jmpret, replace it with
+ // dealloc_return, otherwise just add deallocframe. The function
+ // could be returning via a tail call.
+ if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
.addDef(Hexagon::D15)
.addReg(Hexagon::R30);
- return;
- }
- unsigned NewOpc = Hexagon::L4_return;
- MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
+ return;
+ }
+ unsigned NewOpc = Hexagon::L4_return;
+ MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
.addDef(Hexagon::D15)
.addReg(Hexagon::R30);
- // Transfer the function live-out registers.
- NewI->copyImplicitOps(MF, *RetI);
- MBB.erase(RetI);
+ // Transfer the function live-out registers.
+ NewI->copyImplicitOps(MF, *RetI);
+ MBB.erase(RetI);
+ } else {
+ // L2_deallocframe instruction after it.
+ // Calculate the size of register saved area.
+ int NumVarArgRegs = 6 - FirstVarArgSavedReg;
+ int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?
+ (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);
+
+ MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
+ MachineBasicBlock::iterator I = (Term == MBB.begin()) ? MBB.end()
+ : std::prev(Term);
+ if (I == MBB.end() ||
+ (I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT &&
+ I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC &&
+ I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 &&
+ I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC))
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
+ .addDef(Hexagon::D15)
+ .addReg(Hexagon::R30);
+ if (RegisterSavedAreaSizePlusPadding != 0)
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(RegisterSavedAreaSizePlusPadding);
+ }
}
void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
/// checks are performed, which may still lead to the inline code.
bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
const CSIVect &CSI) const {
+ if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl())
+ return true;
if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
return true;
if (!hasFP(MF))
class HexagonFrameLowering : public TargetFrameLowering {
public:
+ // First register which could possibly hold a variable argument.
+ int FirstVarArgSavedReg;
explicit HexagonFrameLowering()
: TargetFrameLowering(StackGrowsDown, Align(8), 0, Align::None(), true) {}
if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
+ // Linux ABI treats var-arg calls the same way as regular ones.
+ bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
+ HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(),
NumParams);
if (Subtarget.useHVXOps())
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Linux ABI treats var-arg calls the same way as regular ones.
+ bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
+
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
+ HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs,
+ *DAG.getContext(),
MF.getFunction().getFunctionType()->getNumParams());
if (Subtarget.useHVXOps())
// caller's stack is passed only when the struct size is smaller than (and
// equal to) 8 bytes. If not, no address will be passed into callee and
// callee return the result direclty through R0/R1.
+ auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
+ switch (RC.getID()) {
+ case Hexagon::IntRegsRegClassID:
+ return Reg - Hexagon::R0 + 1;
+ case Hexagon::DoubleRegsRegClassID:
+ return (Reg - Hexagon::D0 + 1) * 2;
+ case Hexagon::HvxVRRegClassID:
+ return Reg - Hexagon::V0 + 1;
+ case Hexagon::HvxWRRegClassID:
+ return (Reg - Hexagon::W0 + 1) * 2;
+ }
+ llvm_unreachable("Unexpected register class");
+ };
+ auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ HFL.FirstVarArgSavedReg = 0;
+ HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
}
InVals.push_back(Copy);
MRI.addLiveIn(VA.getLocReg(), VReg);
+ HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
} else {
assert(VA.isMemLoc() && "Argument should be passed in memory");
}
}
+ if (IsVarArg && Subtarget.isEnvironmentMusl()) {
+ for (int i = HFL.FirstVarArgSavedReg; i < 6; i++)
+ MRI.addLiveIn(Hexagon::R0+i);
+ }
+
+ if (IsVarArg && Subtarget.isEnvironmentMusl()) {
+ HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1);
+ HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
+
+ // Create Frame index for the start of register saved area.
+ int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg;
+ bool RequiresPadding = (NumVarArgRegs & 1);
+ int RegSaveAreaSizePlusPadding = RequiresPadding
+ ? (NumVarArgRegs + 1) * 4
+ : NumVarArgRegs * 4;
+
+ if (RegSaveAreaSizePlusPadding > 0) {
+ // The offset to saved register area should be 8 byte aligned.
+ int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+ if (!(RegAreaStart % 8))
+ RegAreaStart = (RegAreaStart + 7) & -8;
- if (IsVarArg) {
+ int RegSaveAreaFrameIndex =
+ MFI.CreateFixedObject(RegSaveAreaSizePlusPadding, RegAreaStart, true);
+ HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);
+
+ // This will point to the next argument passed via stack.
+ int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
+ int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
+ HMFI.setVarArgsFrameIndex(FI);
+ } else {
+ // This will point to the next argument passed via stack, when
+ // there is no saved register area.
+ int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+ int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
+ HMFI.setRegSavedAreaStartFrameIndex(FI);
+ HMFI.setVarArgsFrameIndex(FI);
+ }
+ }
+
+
+ if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
// This will point to the next argument passed via stack.
int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
- MachinePointerInfo(SV));
+
+ if (!Subtarget.isEnvironmentMusl()) {
+ return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
+ MachinePointerInfo(SV));
+ }
+ auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
+ auto &HFL = *Subtarget.getFrameLowering();
+ SDLoc DL(Op);
+ SmallVector<SDValue, 8> MemOps;
+
+ // Get frame index of va_list.
+ SDValue FIN = Op.getOperand(1);
+
+ // If first Vararg register is odd, add 4 bytes to start of
+ // saved register area to point to the first register location.
+ // This is because the saved register area has to be 8 byte aligned.
+ // Incase of an odd start register, there will be 4 bytes of padding in
+ // the beginning of saved register area. If all registers area used up,
+ // the following condition will handle it correctly.
+ SDValue SavedRegAreaStartFrameIndex =
+ DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32);
+
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+ if (HFL.FirstVarArgSavedReg & 1)
+ SavedRegAreaStartFrameIndex =
+ DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(),
+ MVT::i32),
+ DAG.getIntPtrConstant(4, DL));
+
+ // Store the saved register area start pointer.
+ SDValue Store =
+ DAG.getStore(Op.getOperand(0), DL,
+ SavedRegAreaStartFrameIndex,
+ FIN, MachinePointerInfo(SV));
+ MemOps.push_back(Store);
+
+ // Store saved register area end pointer.
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
+ FIN, DAG.getIntPtrConstant(4, DL));
+ Store = DAG.getStore(Op.getOperand(0), DL,
+ DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
+ PtrVT),
+ FIN, MachinePointerInfo(SV, 4));
+ MemOps.push_back(Store);
+
+ // Store overflow area pointer.
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
+ FIN, DAG.getIntPtrConstant(4, DL));
+ Store = DAG.getStore(Op.getOperand(0), DL,
+ DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
+ PtrVT),
+ FIN, MachinePointerInfo(SV, 8));
+ MemOps.push_back(Store);
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
+}
+
+SDValue
+HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+ // Assert that the linux ABI is enabled for the current compilation.
+ assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
+ SDValue Chain = Op.getOperand(0);
+ SDValue DestPtr = Op.getOperand(1);
+ SDValue SrcPtr = Op.getOperand(2);
+ const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ SDLoc DL(Op);
+ // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
+ // we need to memcopy 12 bytes from va_list to another similar list.
+ return DAG.getMemcpy(Chain, DL, DestPtr, SrcPtr,
+ DAG.getIntPtrConstant(12, DL), 4, /*isVolatile*/false,
+ false, false,
+ MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
+
}
SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
- setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ if (Subtarget.isEnvironmentMusl())
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
}
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual)
unsigned StackAlignBasePhysReg = 0; // (physical)
int VarArgsFrameIndex;
+ int RegSavedAreaStartFrameIndex;
+ int FirstNamedArgFrameIndex;
+ int LastNamedArgFrameIndex;
bool HasClobberLR = false;
bool HasEHReturn = false;
std::map<const MachineInstr*, unsigned> PacketInfo;
void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+ void setRegSavedAreaStartFrameIndex(int v) { RegSavedAreaStartFrameIndex = v;}
+ int getRegSavedAreaStartFrameIndex() { return RegSavedAreaStartFrameIndex; }
+
+ void setFirstNamedArgFrameIndex(int v) { FirstNamedArgFrameIndex = v; }
+ int getFirstNamedArgFrameIndex() { return FirstNamedArgFrameIndex; }
+
+ void setLastNamedArgFrameIndex(int v) { LastNamedArgFrameIndex = v; }
+ int getLastNamedArgFrameIndex() { return LastNamedArgFrameIndex; }
+
void setStartPacket(MachineInstr* MI) {
PacketInfo[MI] |= Hexagon::StartPacket;
}
HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
StringRef FS, const TargetMachine &TM)
: HexagonGenSubtargetInfo(TT, CPU, FS), OptLevel(TM.getOptLevel()),
- CPUString(Hexagon_MC::selectHexagonCPU(CPU)),
+ CPUString(Hexagon_MC::selectHexagonCPU(CPU)), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(CPU, FS)),
RegInfo(getHwMode()), TLInfo(TM, *this),
InstrItins(getInstrItineraryForCPU(CPUString)) {
private:
std::string CPUString;
+ Triple TargetTriple;
HexagonInstrInfo InstrInfo;
HexagonRegisterInfo RegInfo;
HexagonTargetLowering TLInfo;
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
const TargetMachine &TM);
+ const Triple &getTargetTriple() const { return TargetTriple; }
+ bool isEnvironmentMusl() const {
+ return TargetTriple.getEnvironment() == Triple::Musl;
+ }
+
/// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
const InstrItineraryData *getInstrItineraryData() const override {
--- /dev/null
+; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s
+; CHECK-LABEL: PrintInts:
+; CHECK-DAG: memw{{.*}} = r{{[0-9]+}}
+; CHECK-DAG: memw{{.*}} = r{{[0-9]+}}
+; CHECK-DAG: r{{[0-9]+}}:{{[0-9]+}} = memd{{.*}}
+; CHECK-DAG: memd{{.*}} = r{{[0-9]+}}:{{[0-9]+}}
+
+%struct.__va_list_tag = type { i8*, i8*, i8* }
+
+; Function Attrs: nounwind
+define void @PrintInts(i32 %first, ...) #0 {
+entry:
+ %vl = alloca [1 x %struct.__va_list_tag], align 8
+ %vl_count = alloca [1 x %struct.__va_list_tag], align 8
+ %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %vl to i8*
+ call void @llvm.va_start(i8* %arraydecay1)
+ %0 = bitcast [1 x %struct.__va_list_tag]* %vl_count to i8*
+ call void @llvm.va_copy(i8* %0, i8* %arraydecay1)
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_copy(i8*, i8*) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+ tail call void (i32, ...) @PrintInts(i32 undef, i32 20, i32 30, i32 40, i32 50, i32 0)
+ ret i32 0
+}
+
+attributes #0 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}
--- /dev/null
+; RUN: llc -march=hexagon -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s
+
+; Test that the compiler deallocates the register saved area on Linux
+; for functions that do not need a frame pointer.
+
+; CHECK: r29 = add(r29,#-[[SIZE:[0-9]+]]
+; CHECK: r29 = add(r29,#[[SIZE]])
+
+define void @test(...) {
+entry:
+ ret void
+}
+
--- /dev/null
+; RUN: llc -march=hexagon -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s
+
+; Check that we update the stack pointer before we do allocframe, so that
+; the LR/FP are stored in the location required by the Linux ABI.
+; CHECK: r29 = add(r29,#-24)
+; CHECK: allocframe
+
+target triple = "hexagon-unknown-linux"
+
+%s.0 = type { i8*, i8*, i8* }
+
+define dso_local i32 @f0(i32 %a0, ...) local_unnamed_addr #0 {
+b0:
+ %v0 = alloca [1 x %s.0], align 8
+ %v1 = bitcast [1 x %s.0]* %v0 to i8*
+ call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %v1) #2
+ call void @llvm.va_start(i8* nonnull %v1)
+ %v2 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 0
+ %v3 = load i8*, i8** %v2, align 8
+ %v4 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 1
+ %v5 = load i8*, i8** %v4, align 4
+ %v6 = getelementptr i8, i8* %v3, i32 4
+ %v7 = icmp sgt i8* %v6, %v5
+ br i1 %v7, label %b1, label %b2
+
+b1: ; preds = %b0
+ %v8 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 2
+ %v9 = load i8*, i8** %v8, align 8
+ %v10 = getelementptr i8, i8* %v9, i32 4
+ store i8* %v10, i8** %v8, align 8
+ br label %b2
+
+b2: ; preds = %b1, %b0
+ %v11 = phi i8* [ %v10, %b1 ], [ %v6, %b0 ]
+ %v12 = phi i8* [ %v9, %b1 ], [ %v3, %b0 ]
+ %v13 = bitcast i8* %v12 to i32*
+ store i8* %v11, i8** %v2, align 8
+ %v14 = load i32, i32* %v13, align 4
+ %v15 = icmp eq i32 %v14, 0
+ br i1 %v15, label %b7, label %b3
+
+b3: ; preds = %b2
+ %v16 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 2
+ br label %b4
+
+b4: ; preds = %b6, %b3
+ %v17 = phi i32 [ %v14, %b3 ], [ %v28, %b6 ]
+ %v18 = phi i32 [ %a0, %b3 ], [ %v20, %b6 ]
+ %v19 = phi i8* [ %v11, %b3 ], [ %v25, %b6 ]
+ %v20 = add nsw i32 %v17, %v18
+ %v21 = getelementptr i8, i8* %v19, i32 4
+ %v22 = icmp sgt i8* %v21, %v5
+ br i1 %v22, label %b5, label %b6
+
+b5: ; preds = %b4
+ %v23 = load i8*, i8** %v16, align 8
+ %v24 = getelementptr i8, i8* %v23, i32 4
+ store i8* %v24, i8** %v16, align 8
+ br label %b6
+
+b6: ; preds = %b5, %b4
+ %v25 = phi i8* [ %v24, %b5 ], [ %v21, %b4 ]
+ %v26 = phi i8* [ %v23, %b5 ], [ %v19, %b4 ]
+ %v27 = bitcast i8* %v26 to i32*
+ store i8* %v25, i8** %v2, align 8
+ %v28 = load i32, i32* %v27, align 4
+ %v29 = icmp eq i32 %v28, 0
+ br i1 %v29, label %b7, label %b4
+
+b7: ; preds = %b6, %b2
+ %v30 = phi i32 [ %a0, %b2 ], [ %v20, %b6 ]
+ call void @llvm.va_end(i8* nonnull %v1)
+ call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %v1) #2
+ ret i32 %v30
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+attributes #0 = { argmemonly nounwind "frame-pointer"="all" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
--- /dev/null
+; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: foo:
+
+; Check function prologue generation
+; CHECK: r29 = add(r29,#-24)
+; CHECK: memw(r29+#4) = r1
+; CHECK: memw(r29+#8) = r2
+; CHECK: memw(r29+#12) = r3
+; CHECK: memw(r29+#16) = r4
+; CHECK: memw(r29+#20) = r5
+; CHECK: r29 = add(r29,#24)
+
+
+%struct.AAA = type { i32, i32, i32, i32 }
+%struct.__va_list_tag = type { i8*, i8*, i8* }
+
+@aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+@.str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %xx, ...) #0 {
+entry:
+ %ap = alloca [1 x %struct.__va_list_tag], align 8
+ %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*
+ call void @llvm.va_start(i8* %arraydecay1)
+ %__current_saved_reg_area_pointer_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 0
+ %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p, align 8
+ %__saved_reg_area_end_pointer_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 1
+ %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p, align 4
+ %__new_saved_reg_area_pointer = getelementptr i8, i8* %__current_saved_reg_area_pointer, i32 4
+ %0 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer
+ %__overflow_area_pointer_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 2
+ %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p, align 8
+ br i1 %0, label %vaarg.on_stack, label %vaarg.end
+
+vaarg.on_stack: ; preds = %entry
+ %__overflow_area_pointer.next = getelementptr i8, i8* %__overflow_area_pointer, i32 4
+ store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p, align 8
+ br label %vaarg.end
+
+vaarg.end: ; preds = %entry, %vaarg.on_stack
+ %__overflow_area_pointer5 = phi i8* [ %__overflow_area_pointer.next, %vaarg.on_stack ], [ %__overflow_area_pointer, %entry ]
+ %storemerge32 = phi i8* [ %__overflow_area_pointer.next, %vaarg.on_stack ], [ %__new_saved_reg_area_pointer, %entry ]
+ %vaarg.addr.in = phi i8* [ %__overflow_area_pointer, %vaarg.on_stack ], [ %__current_saved_reg_area_pointer, %entry ]
+ store i8* %storemerge32, i8** %__current_saved_reg_area_pointer_p, align 8
+ %vaarg.addr = bitcast i8* %vaarg.addr.in to i32*
+ %1 = load i32, i32* %vaarg.addr, align 4
+ %__overflow_area_pointer_p4 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 2
+ %__overflow_area_pointer.next6 = getelementptr i8, i8* %__overflow_area_pointer5, i32 16
+ store i8* %__overflow_area_pointer.next6, i8** %__overflow_area_pointer_p4, align 8
+ %bbb.sroa.1.0.idx27 = getelementptr inbounds i8, i8* %__overflow_area_pointer5, i32 12
+ %2 = bitcast i8* %bbb.sroa.1.0.idx27 to i32*
+ %bbb.sroa.1.0.copyload = load i32, i32* %2, align 4
+ %add8 = add nsw i32 %bbb.sroa.1.0.copyload, %1
+ %__new_saved_reg_area_pointer15 = getelementptr i8, i8* %storemerge32, i32 4
+ %3 = icmp sgt i8* %__new_saved_reg_area_pointer15, %__saved_reg_area_end_pointer
+ br i1 %3, label %vaarg.on_stack17, label %vaarg.end21
+
+vaarg.on_stack17: ; preds = %vaarg.end
+ %__overflow_area_pointer.next20 = getelementptr i8, i8* %__overflow_area_pointer5, i32 20
+ store i8* %__overflow_area_pointer.next20, i8** %__overflow_area_pointer_p4, align 8
+ br label %vaarg.end21
+
+vaarg.end21: ; preds = %vaarg.end, %vaarg.on_stack17
+ %storemerge = phi i8* [ %__overflow_area_pointer.next20, %vaarg.on_stack17 ], [ %__new_saved_reg_area_pointer15, %vaarg.end ]
+ %vaarg.addr22.in = phi i8* [ %__overflow_area_pointer.next6, %vaarg.on_stack17 ], [ %storemerge32, %vaarg.end ]
+ store i8* %storemerge, i8** %__current_saved_reg_area_pointer_p, align 8
+ %vaarg.addr22 = bitcast i8* %vaarg.addr22.in to i32*
+ %4 = load i32, i32* %vaarg.addr22, align 4
+ %add23 = add nsw i32 %add8, %4
+ call void @llvm.va_end(i8* %arraydecay1)
+ ret i32 %add23
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+ %call = tail call i32 (i32, ...) @foo(i32 undef, i32 2, %struct.AAA* byval align 4 @aaa, i32 4)
+ %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %call) #1
+ ret i32 %call
+}
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #0
+
+attributes #0 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}
--- /dev/null
+; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: foo:
+
+; Check Function prologue.
+; Note. All register numbers and offset are fixed.
+; Hence, no need of regular expression.
+
+; CHECK: r29 = add(r29,#-24)
+; CHECK: r7:6 = memd(r29+#24)
+; CHECK: memd(r29+#0) = r7:6
+; CHECK: r7:6 = memd(r29+#32)
+; CHECK: memd(r29+#8) = r7:6
+; CHECK: r7:6 = memd(r29+#40)
+; CHECK: memd(r29+#16) = r7:6
+; CHECK: memw(r29+#28) = r1
+; CHECK: memw(r29+#32) = r2
+; CHECK: memw(r29+#36) = r3
+; CHECK: memw(r29+#40) = r4
+; CHECK: memw(r29+#44) = r5
+; CHECK: r29 = add(r29,#24)
+
+%struct.AAA = type { i32, i32, i32, i32 }
+%struct.BBB = type { i8, i64, i32 }
+%struct.__va_list_tag = type { i8*, i8*, i8* }
+
+@aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+@ddd = global { i8, i64, i32, [4 x i8] } { i8 1, i64 1000000, i32 5, [4 x i8] undef }, align 8
+@.str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %xx, %struct.BBB* byval align 8 %eee, ...) #0 {
+entry:
+ %xx.addr = alloca i32, align 4
+ %ap = alloca [1 x %struct.__va_list_tag], align 8
+ %d = alloca i32, align 4
+ %k = alloca i64, align 8
+ %ret = alloca i32, align 4
+ %bbb = alloca %struct.AAA, align 4
+ store i32 %xx, i32* %xx.addr, align 4
+ store i32 0, i32* %ret, align 4
+ %x = getelementptr inbounds %struct.BBB, %struct.BBB* %eee, i32 0, i32 0
+ %0 = load i8, i8* %x, align 1
+ %tobool = trunc i8 %0 to i1
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 1, i32* %ret, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8*
+ call void @llvm.va_start(i8* %arraydecay1)
+ %arraydecay2 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ br label %vaarg.maybe_reg
+
+vaarg.maybe_reg: ; preds = %if.end
+ %__current_saved_reg_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 0
+ %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p
+ %__saved_reg_area_end_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 1
+ %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p
+ %1 = ptrtoint i8* %__current_saved_reg_area_pointer to i32
+ %align_current_saved_reg_area_pointer = add i32 %1, 7
+ %align_current_saved_reg_area_pointer3 = and i32 %align_current_saved_reg_area_pointer, -8
+ %align_current_saved_reg_area_pointer4 = inttoptr i32 %align_current_saved_reg_area_pointer3 to i8*
+ %__new_saved_reg_area_pointer = getelementptr i8, i8* %align_current_saved_reg_area_pointer4, i32 8
+ %2 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer
+ br i1 %2, label %vaarg.on_stack, label %vaarg.in_reg
+
+vaarg.in_reg: ; preds = %vaarg.maybe_reg
+ %3 = bitcast i8* %align_current_saved_reg_area_pointer4 to i64*
+ store i8* %__new_saved_reg_area_pointer, i8** %__current_saved_reg_area_pointer_p
+ br label %vaarg.end
+
+vaarg.on_stack: ; preds = %vaarg.maybe_reg
+ %__overflow_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 2
+ %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p
+ %4 = ptrtoint i8* %__overflow_area_pointer to i32
+ %align_overflow_area_pointer = add i32 %4, 7
+ %align_overflow_area_pointer5 = and i32 %align_overflow_area_pointer, -8
+ %align_overflow_area_pointer6 = inttoptr i32 %align_overflow_area_pointer5 to i8*
+ %__overflow_area_pointer.next = getelementptr i8, i8* %align_overflow_area_pointer6, i32 8
+ store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p
+ store i8* %__overflow_area_pointer.next, i8** %__current_saved_reg_area_pointer_p
+ %5 = bitcast i8* %align_overflow_area_pointer6 to i64*
+ br label %vaarg.end
+
+vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg
+ %vaarg.addr = phi i64* [ %3, %vaarg.in_reg ], [ %5, %vaarg.on_stack ]
+ %6 = load i64, i64* %vaarg.addr
+ store i64 %6, i64* %k, align 8
+ %7 = load i64, i64* %k, align 8
+ %conv = trunc i64 %7 to i32
+ %div = sdiv i32 %conv, 1000
+ %8 = load i32, i32* %ret, align 4
+ %add = add nsw i32 %8, %div
+ store i32 %add, i32* %ret, align 4
+ %arraydecay7 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %__overflow_area_pointer_p8 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay7, i32 0, i32 2
+ %__overflow_area_pointer9 = load i8*, i8** %__overflow_area_pointer_p8
+ %9 = bitcast i8* %__overflow_area_pointer9 to %struct.AAA*
+ %__overflow_area_pointer.next10 = getelementptr i8, i8* %__overflow_area_pointer9, i32 16
+ store i8* %__overflow_area_pointer.next10, i8** %__overflow_area_pointer_p8
+ %10 = bitcast %struct.AAA* %bbb to i8*
+ %11 = bitcast %struct.AAA* %9 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %10, i8* %11, i32 16, i32 4, i1 false)
+ %d11 = getelementptr inbounds %struct.AAA, %struct.AAA* %bbb, i32 0, i32 3
+ %12 = load i32, i32* %d11, align 4
+ %13 = load i32, i32* %ret, align 4
+ %add12 = add nsw i32 %13, %12
+ store i32 %add12, i32* %ret, align 4
+ %arraydecay13 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ br label %vaarg.maybe_reg14
+
+vaarg.maybe_reg14: ; preds = %vaarg.end
+ %__current_saved_reg_area_pointer_p15 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 0
+ %__current_saved_reg_area_pointer16 = load i8*, i8** %__current_saved_reg_area_pointer_p15
+ %__saved_reg_area_end_pointer_p17 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 1
+ %__saved_reg_area_end_pointer18 = load i8*, i8** %__saved_reg_area_end_pointer_p17
+ %__new_saved_reg_area_pointer19 = getelementptr i8, i8* %__current_saved_reg_area_pointer16, i32 4
+ %14 = icmp sgt i8* %__new_saved_reg_area_pointer19, %__saved_reg_area_end_pointer18
+ br i1 %14, label %vaarg.on_stack21, label %vaarg.in_reg20
+
+vaarg.in_reg20: ; preds = %vaarg.maybe_reg14
+ %15 = bitcast i8* %__current_saved_reg_area_pointer16 to i32*
+ store i8* %__new_saved_reg_area_pointer19, i8** %__current_saved_reg_area_pointer_p15
+ br label %vaarg.end25
+
+vaarg.on_stack21: ; preds = %vaarg.maybe_reg14
+ %__overflow_area_pointer_p22 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 2
+ %__overflow_area_pointer23 = load i8*, i8** %__overflow_area_pointer_p22
+ %__overflow_area_pointer.next24 = getelementptr i8, i8* %__overflow_area_pointer23, i32 4
+ store i8* %__overflow_area_pointer.next24, i8** %__overflow_area_pointer_p22
+ store i8* %__overflow_area_pointer.next24, i8** %__current_saved_reg_area_pointer_p15
+ %16 = bitcast i8* %__overflow_area_pointer23 to i32*
+ br label %vaarg.end25
+
+vaarg.end25: ; preds = %vaarg.on_stack21, %vaarg.in_reg20
+ %vaarg.addr26 = phi i32* [ %15, %vaarg.in_reg20 ], [ %16, %vaarg.on_stack21 ]
+ %17 = load i32, i32* %vaarg.addr26
+ store i32 %17, i32* %d, align 4
+ %18 = load i32, i32* %d, align 4
+ %19 = load i32, i32* %ret, align 4
+ %add27 = add nsw i32 %19, %18
+ store i32 %add27, i32* %ret, align 4
+ %arraydecay28 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %arraydecay2829 = bitcast %struct.__va_list_tag* %arraydecay28 to i8*
+ call void @llvm.va_end(i8* %arraydecay2829)
+ %20 = load i32, i32* %ret, align 4
+ ret i32 %20
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %x = alloca i32, align 4
+ %m = alloca i64, align 8
+ store i32 0, i32* %retval
+ store i64 1000000, i64* %m, align 8
+ %0 = load i64, i64* %m, align 8
+ %call = call i32 (i32, %struct.BBB*, ...) @foo(i32 1, %struct.BBB* byval align 8 bitcast ({ i8, i64, i32, [4 x i8] }* @ddd to %struct.BBB*), i64 %0, %struct.AAA* byval align 4 @aaa, i32 4)
+ store i32 %call, i32* %x, align 4
+ %1 = load i32, i32* %x, align 4
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %1)
+ %2 = load i32, i32* %x, align 4
+ ret i32 %2
+}
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}
--- /dev/null
+; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: foo:
+
+; Check Function prologue.
+; Note. All register numbers and offset are fixed.
+; Hence, no need of regular expression.
+
+; CHECK: r29 = add(r29,#-8)
+; CHECK: memw(r29+#4) = r5
+; CHECK: r29 = add(r29,#8)
+
+%struct.AAA = type { i32, i32, i32, i32 }
+%struct.__va_list_tag = type { i8*, i8*, i8* }
+
+@aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+@.str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %xx, i32 %a, i32 %b, i32 %c, i32 %x, ...) #0 {
+entry:
+ %xx.addr = alloca i32, align 4
+ %a.addr = alloca i32, align 4
+ %b.addr = alloca i32, align 4
+ %c.addr = alloca i32, align 4
+ %x.addr = alloca i32, align 4
+ %ap = alloca [1 x %struct.__va_list_tag], align 8
+ %d = alloca i32, align 4
+ %ret = alloca i32, align 4
+ %bbb = alloca %struct.AAA, align 4
+ store i32 %xx, i32* %xx.addr, align 4
+ store i32 %a, i32* %a.addr, align 4
+ store i32 %b, i32* %b.addr, align 4
+ store i32 %c, i32* %c.addr, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 0, i32* %ret, align 4
+ %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8*
+ call void @llvm.va_start(i8* %arraydecay1)
+ %arraydecay2 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ br label %vaarg.maybe_reg
+
+vaarg.maybe_reg: ; preds = %entry
+ %__current_saved_reg_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 0
+ %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p
+ %__saved_reg_area_end_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 1
+ %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p
+ %0 = ptrtoint i8* %__current_saved_reg_area_pointer to i32
+ %align_current_saved_reg_area_pointer = add i32 %0, 7
+ %align_current_saved_reg_area_pointer3 = and i32 %align_current_saved_reg_area_pointer, -8
+ %align_current_saved_reg_area_pointer4 = inttoptr i32 %align_current_saved_reg_area_pointer3 to i8*
+ %__new_saved_reg_area_pointer = getelementptr i8, i8* %align_current_saved_reg_area_pointer4, i32 8
+ %1 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer
+ br i1 %1, label %vaarg.on_stack, label %vaarg.in_reg
+
+vaarg.in_reg: ; preds = %vaarg.maybe_reg
+ %2 = bitcast i8* %align_current_saved_reg_area_pointer4 to i64*
+ store i8* %__new_saved_reg_area_pointer, i8** %__current_saved_reg_area_pointer_p
+ br label %vaarg.end
+
+vaarg.on_stack: ; preds = %vaarg.maybe_reg
+ %__overflow_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 2
+ %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p
+ %3 = ptrtoint i8* %__overflow_area_pointer to i32
+ %align_overflow_area_pointer = add i32 %3, 7
+ %align_overflow_area_pointer5 = and i32 %align_overflow_area_pointer, -8
+ %align_overflow_area_pointer6 = inttoptr i32 %align_overflow_area_pointer5 to i8*
+ %__overflow_area_pointer.next = getelementptr i8, i8* %align_overflow_area_pointer6, i32 8
+ store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p
+ store i8* %__overflow_area_pointer.next, i8** %__current_saved_reg_area_pointer_p
+ %4 = bitcast i8* %align_overflow_area_pointer6 to i64*
+ br label %vaarg.end
+
+vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg
+ %vaarg.addr = phi i64* [ %2, %vaarg.in_reg ], [ %4, %vaarg.on_stack ]
+ %5 = load i64, i64* %vaarg.addr
+ %conv = trunc i64 %5 to i32
+ store i32 %conv, i32* %d, align 4
+ %6 = load i32, i32* %d, align 4
+ %7 = load i32, i32* %ret, align 4
+ %add = add nsw i32 %7, %6
+ store i32 %add, i32* %ret, align 4
+ %arraydecay7 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %__overflow_area_pointer_p8 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay7, i32 0, i32 2
+ %__overflow_area_pointer9 = load i8*, i8** %__overflow_area_pointer_p8
+ %8 = bitcast i8* %__overflow_area_pointer9 to %struct.AAA*
+ %__overflow_area_pointer.next10 = getelementptr i8, i8* %__overflow_area_pointer9, i32 16
+ store i8* %__overflow_area_pointer.next10, i8** %__overflow_area_pointer_p8
+ %9 = bitcast %struct.AAA* %bbb to i8*
+ %10 = bitcast %struct.AAA* %8 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %9, i8* %10, i32 16, i32 4, i1 false)
+ %d11 = getelementptr inbounds %struct.AAA, %struct.AAA* %bbb, i32 0, i32 3
+ %11 = load i32, i32* %d11, align 4
+ %12 = load i32, i32* %ret, align 4
+ %add12 = add nsw i32 %12, %11
+ store i32 %add12, i32* %ret, align 4
+ %arraydecay13 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ br label %vaarg.maybe_reg14
+
+vaarg.maybe_reg14: ; preds = %vaarg.end
+ %__current_saved_reg_area_pointer_p15 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 0
+ %__current_saved_reg_area_pointer16 = load i8*, i8** %__current_saved_reg_area_pointer_p15
+ %__saved_reg_area_end_pointer_p17 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 1
+ %__saved_reg_area_end_pointer18 = load i8*, i8** %__saved_reg_area_end_pointer_p17
+ %__new_saved_reg_area_pointer19 = getelementptr i8, i8* %__current_saved_reg_area_pointer16, i32 4
+ %13 = icmp sgt i8* %__new_saved_reg_area_pointer19, %__saved_reg_area_end_pointer18
+ br i1 %13, label %vaarg.on_stack21, label %vaarg.in_reg20
+
+vaarg.in_reg20: ; preds = %vaarg.maybe_reg14
+ %14 = bitcast i8* %__current_saved_reg_area_pointer16 to i32*
+ store i8* %__new_saved_reg_area_pointer19, i8** %__current_saved_reg_area_pointer_p15
+ br label %vaarg.end25
+
+vaarg.on_stack21: ; preds = %vaarg.maybe_reg14
+ %__overflow_area_pointer_p22 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 2
+ %__overflow_area_pointer23 = load i8*, i8** %__overflow_area_pointer_p22
+ %__overflow_area_pointer.next24 = getelementptr i8, i8* %__overflow_area_pointer23, i32 4
+ store i8* %__overflow_area_pointer.next24, i8** %__overflow_area_pointer_p22
+ store i8* %__overflow_area_pointer.next24, i8** %__current_saved_reg_area_pointer_p15
+ %15 = bitcast i8* %__overflow_area_pointer23 to i32*
+ br label %vaarg.end25
+
+vaarg.end25: ; preds = %vaarg.on_stack21, %vaarg.in_reg20
+ %vaarg.addr26 = phi i32* [ %14, %vaarg.in_reg20 ], [ %15, %vaarg.on_stack21 ]
+ %16 = load i32, i32* %vaarg.addr26
+ store i32 %16, i32* %d, align 4
+ %17 = load i32, i32* %d, align 4
+ %18 = load i32, i32* %ret, align 4
+ %add27 = add nsw i32 %18, %17
+ store i32 %add27, i32* %ret, align 4
+ %arraydecay28 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ br label %vaarg.maybe_reg29
+
+vaarg.maybe_reg29: ; preds = %vaarg.end25
+ %__current_saved_reg_area_pointer_p30 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay28, i32 0, i32 0
+ %__current_saved_reg_area_pointer31 = load i8*, i8** %__current_saved_reg_area_pointer_p30
+ %__saved_reg_area_end_pointer_p32 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay28, i32 0, i32 1
+ %__saved_reg_area_end_pointer33 = load i8*, i8** %__saved_reg_area_end_pointer_p32
+ %19 = ptrtoint i8* %__current_saved_reg_area_pointer31 to i32
+ %align_current_saved_reg_area_pointer34 = add i32 %19, 7
+ %align_current_saved_reg_area_pointer35 = and i32 %align_current_saved_reg_area_pointer34, -8
+ %align_current_saved_reg_area_pointer36 = inttoptr i32 %align_current_saved_reg_area_pointer35 to i8*
+ %__new_saved_reg_area_pointer37 = getelementptr i8, i8* %align_current_saved_reg_area_pointer36, i32 8
+ %20 = icmp sgt i8* %__new_saved_reg_area_pointer37, %__saved_reg_area_end_pointer33
+ br i1 %20, label %vaarg.on_stack39, label %vaarg.in_reg38
+
+vaarg.in_reg38: ; preds = %vaarg.maybe_reg29
+ %21 = bitcast i8* %align_current_saved_reg_area_pointer36 to i64*
+ store i8* %__new_saved_reg_area_pointer37, i8** %__current_saved_reg_area_pointer_p30
+ br label %vaarg.end46
+
+vaarg.on_stack39: ; preds = %vaarg.maybe_reg29
+ %__overflow_area_pointer_p40 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay28, i32 0, i32 2
+ %__overflow_area_pointer41 = load i8*, i8** %__overflow_area_pointer_p40
+ %22 = ptrtoint i8* %__overflow_area_pointer41 to i32
+ %align_overflow_area_pointer42 = add i32 %22, 7
+ %align_overflow_area_pointer43 = and i32 %align_overflow_area_pointer42, -8
+ %align_overflow_area_pointer44 = inttoptr i32 %align_overflow_area_pointer43 to i8*
+ %__overflow_area_pointer.next45 = getelementptr i8, i8* %align_overflow_area_pointer44, i32 8
+ store i8* %__overflow_area_pointer.next45, i8** %__overflow_area_pointer_p40
+ store i8* %__overflow_area_pointer.next45, i8** %__current_saved_reg_area_pointer_p30
+ %23 = bitcast i8* %align_overflow_area_pointer44 to i64*
+ br label %vaarg.end46
+
+vaarg.end46: ; preds = %vaarg.on_stack39, %vaarg.in_reg38
+ %vaarg.addr47 = phi i64* [ %21, %vaarg.in_reg38 ], [ %23, %vaarg.on_stack39 ]
+ %24 = load i64, i64* %vaarg.addr47
+ %conv48 = trunc i64 %24 to i32
+ store i32 %conv48, i32* %d, align 4
+ %25 = load i32, i32* %d, align 4
+ %26 = load i32, i32* %ret, align 4
+ %add49 = add nsw i32 %26, %25
+ store i32 %add49, i32* %ret, align 4
+ %arraydecay50 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %arraydecay5051 = bitcast %struct.__va_list_tag* %arraydecay50 to i8*
+ call void @llvm.va_end(i8* %arraydecay5051)
+ %27 = load i32, i32* %ret, align 4
+ ret i32 %27
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %x = alloca i32, align 4
+ %y = alloca i64, align 8
+ store i32 0, i32* %retval
+ store i64 1000000, i64* %y, align 8
+ %0 = load i64, i64* %y, align 8
+ %1 = load i64, i64* %y, align 8
+ %call = call i32 (i32, i32, i32, i32, i32, ...) @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i64 %0, %struct.AAA* byval align 4 @aaa, i32 4, i64 %1)
+ store i32 %call, i32* %x, align 4
+ %2 = load i32, i32* %x, align 4
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %2)
+ %3 = load i32, i32* %x, align 4
+ ret i32 %3
+}
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #0 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}
--- /dev/null
+; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: foo:
+
+; Check Function prologue.
+; Note. All register numbers and offset are fixed.
+; Hence, no need of regular expression.
+
+; CHECK: r29 = add(r29,#-16)
+; CHECK: r7:6 = memd(r29+#16)
+; CHECK: memd(r29+#0) = r7:6
+; CHECK: r7:6 = memd(r29+#24)
+; CHECK: memd(r29+#8) = r7:6
+; CHECK: r7:6 = memd(r29+#32)
+; CHECK: memd(r29+#16) = r7:6
+; CHECK: r7:6 = memd(r29+#40)
+; CHECK: memd(r29+#24) = r7:6
+; CHECK: memw(r29+#36) = r3
+; CHECK: memw(r29+#40) = r4
+; CHECK: memw(r29+#44) = r5
+; CHECK: r29 = add(r29,#16)
+
+%struct.AAA = type { i32, i32, i32, i32 }
+%struct.__va_list_tag = type { i8*, i8*, i8* }
+
+@aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+@xxx = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+@yyy = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4
+@ccc = global %struct.AAA { i32 10, i32 20, i32 30, i32 40 }, align 4
+@fff = global %struct.AAA { i32 1, i32 2, i32 3, i32 4 }, align 4
+@.str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %xx, i32 %z, i32 %m, %struct.AAA* byval align 4 %bbb, %struct.AAA* byval align 4 %GGG, ...) #0 {
+entry:
+ %xx.addr = alloca i32, align 4
+ %z.addr = alloca i32, align 4
+ %m.addr = alloca i32, align 4
+ %ap = alloca [1 x %struct.__va_list_tag], align 8
+ %d = alloca i32, align 4
+ %ret = alloca i32, align 4
+ %ddd = alloca %struct.AAA, align 4
+ %ggg = alloca %struct.AAA, align 4
+ %nnn = alloca %struct.AAA, align 4
+ store i32 %xx, i32* %xx.addr, align 4
+ store i32 %z, i32* %z.addr, align 4
+ store i32 %m, i32* %m.addr, align 4
+ store i32 0, i32* %ret, align 4
+ %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8*
+ call void @llvm.va_start(i8* %arraydecay1)
+ %d2 = getelementptr inbounds %struct.AAA, %struct.AAA* %bbb, i32 0, i32 3
+ %0 = load i32, i32* %d2, align 4
+ %1 = load i32, i32* %ret, align 4
+ %add = add nsw i32 %1, %0
+ store i32 %add, i32* %ret, align 4
+ %2 = load i32, i32* %z.addr, align 4
+ %3 = load i32, i32* %ret, align 4
+ %add3 = add nsw i32 %3, %2
+ store i32 %add3, i32* %ret, align 4
+ %arraydecay4 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ br label %vaarg.maybe_reg
+
+vaarg.maybe_reg: ; preds = %entry
+ %__current_saved_reg_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay4, i32 0, i32 0
+ %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p
+ %__saved_reg_area_end_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay4, i32 0, i32 1
+ %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p
+ %__new_saved_reg_area_pointer = getelementptr i8, i8* %__current_saved_reg_area_pointer, i32 4
+ %4 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer
+ br i1 %4, label %vaarg.on_stack, label %vaarg.in_reg
+
+vaarg.in_reg: ; preds = %vaarg.maybe_reg
+ %5 = bitcast i8* %__current_saved_reg_area_pointer to i32*
+ store i8* %__new_saved_reg_area_pointer, i8** %__current_saved_reg_area_pointer_p
+ br label %vaarg.end
+
+vaarg.on_stack: ; preds = %vaarg.maybe_reg
+ %__overflow_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay4, i32 0, i32 2
+ %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p
+ %__overflow_area_pointer.next = getelementptr i8, i8* %__overflow_area_pointer, i32 4
+ store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p
+ store i8* %__overflow_area_pointer.next, i8** %__current_saved_reg_area_pointer_p
+ %6 = bitcast i8* %__overflow_area_pointer to i32*
+ br label %vaarg.end
+
+vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg
+ %vaarg.addr = phi i32* [ %5, %vaarg.in_reg ], [ %6, %vaarg.on_stack ]
+ %7 = load i32, i32* %vaarg.addr
+ store i32 %7, i32* %d, align 4
+ %8 = load i32, i32* %d, align 4
+ %9 = load i32, i32* %ret, align 4
+ %add5 = add nsw i32 %9, %8
+ store i32 %add5, i32* %ret, align 4
+ %arraydecay6 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %__overflow_area_pointer_p7 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay6, i32 0, i32 2
+ %__overflow_area_pointer8 = load i8*, i8** %__overflow_area_pointer_p7
+ %10 = bitcast i8* %__overflow_area_pointer8 to %struct.AAA*
+ %__overflow_area_pointer.next9 = getelementptr i8, i8* %__overflow_area_pointer8, i32 16
+ store i8* %__overflow_area_pointer.next9, i8** %__overflow_area_pointer_p7
+ %11 = bitcast %struct.AAA* %ddd to i8*
+ %12 = bitcast %struct.AAA* %10 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %11, i8* %12, i32 16, i32 4, i1 false)
+ %d10 = getelementptr inbounds %struct.AAA, %struct.AAA* %ddd, i32 0, i32 3
+ %13 = load i32, i32* %d10, align 4
+ %14 = load i32, i32* %ret, align 4
+ %add11 = add nsw i32 %14, %13
+ store i32 %add11, i32* %ret, align 4
+ %arraydecay12 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %__overflow_area_pointer_p13 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay12, i32 0, i32 2
+ %__overflow_area_pointer14 = load i8*, i8** %__overflow_area_pointer_p13
+ %15 = bitcast i8* %__overflow_area_pointer14 to %struct.AAA*
+ %__overflow_area_pointer.next15 = getelementptr i8, i8* %__overflow_area_pointer14, i32 16
+ store i8* %__overflow_area_pointer.next15, i8** %__overflow_area_pointer_p13
+ %16 = bitcast %struct.AAA* %ggg to i8*
+ %17 = bitcast %struct.AAA* %15 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %16, i8* %17, i32 16, i32 4, i1 false)
+ %d16 = getelementptr inbounds %struct.AAA, %struct.AAA* %ggg, i32 0, i32 3
+ %18 = load i32, i32* %d16, align 4
+ %19 = load i32, i32* %ret, align 4
+ %add17 = add nsw i32 %19, %18
+ store i32 %add17, i32* %ret, align 4
+ %arraydecay18 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %__overflow_area_pointer_p19 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay18, i32 0, i32 2
+ %__overflow_area_pointer20 = load i8*, i8** %__overflow_area_pointer_p19
+ %20 = bitcast i8* %__overflow_area_pointer20 to %struct.AAA*
+ %__overflow_area_pointer.next21 = getelementptr i8, i8* %__overflow_area_pointer20, i32 16
+ store i8* %__overflow_area_pointer.next21, i8** %__overflow_area_pointer_p19
+ %21 = bitcast %struct.AAA* %nnn to i8*
+ %22 = bitcast %struct.AAA* %20 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %21, i8* %22, i32 16, i32 4, i1 false)
+ %d22 = getelementptr inbounds %struct.AAA, %struct.AAA* %nnn, i32 0, i32 3
+ %23 = load i32, i32* %d22, align 4
+ %24 = load i32, i32* %ret, align 4
+ %add23 = add nsw i32 %24, %23
+ store i32 %add23, i32* %ret, align 4
+ %arraydecay24 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ br label %vaarg.maybe_reg25
+
+vaarg.maybe_reg25: ; preds = %vaarg.end
+ %__current_saved_reg_area_pointer_p26 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay24, i32 0, i32 0
+ %__current_saved_reg_area_pointer27 = load i8*, i8** %__current_saved_reg_area_pointer_p26
+ %__saved_reg_area_end_pointer_p28 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay24, i32 0, i32 1
+ %__saved_reg_area_end_pointer29 = load i8*, i8** %__saved_reg_area_end_pointer_p28
+ %__new_saved_reg_area_pointer30 = getelementptr i8, i8* %__current_saved_reg_area_pointer27, i32 4
+ %25 = icmp sgt i8* %__new_saved_reg_area_pointer30, %__saved_reg_area_end_pointer29
+ br i1 %25, label %vaarg.on_stack32, label %vaarg.in_reg31
+
+vaarg.in_reg31: ; preds = %vaarg.maybe_reg25
+ %26 = bitcast i8* %__current_saved_reg_area_pointer27 to i32*
+ store i8* %__new_saved_reg_area_pointer30, i8** %__current_saved_reg_area_pointer_p26
+ br label %vaarg.end36
+
+vaarg.on_stack32: ; preds = %vaarg.maybe_reg25
+ %__overflow_area_pointer_p33 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay24, i32 0, i32 2
+ %__overflow_area_pointer34 = load i8*, i8** %__overflow_area_pointer_p33
+ %__overflow_area_pointer.next35 = getelementptr i8, i8* %__overflow_area_pointer34, i32 4
+ store i8* %__overflow_area_pointer.next35, i8** %__overflow_area_pointer_p33
+ store i8* %__overflow_area_pointer.next35, i8** %__current_saved_reg_area_pointer_p26
+ %27 = bitcast i8* %__overflow_area_pointer34 to i32*
+ br label %vaarg.end36
+
+vaarg.end36: ; preds = %vaarg.on_stack32, %vaarg.in_reg31
+ %vaarg.addr37 = phi i32* [ %26, %vaarg.in_reg31 ], [ %27, %vaarg.on_stack32 ]
+ %28 = load i32, i32* %vaarg.addr37
+ store i32 %28, i32* %d, align 4
+ %29 = load i32, i32* %d, align 4
+ %30 = load i32, i32* %ret, align 4
+ %add38 = add nsw i32 %30, %29
+ store i32 %add38, i32* %ret, align 4
+ %31 = load i32, i32* %m.addr, align 4
+ %32 = load i32, i32* %ret, align 4
+ %add39 = add nsw i32 %32, %31
+ store i32 %add39, i32* %ret, align 4
+ %arraydecay40 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+ %arraydecay4041 = bitcast %struct.__va_list_tag* %arraydecay40 to i8*
+ call void @llvm.va_end(i8* %arraydecay4041)
+ %33 = load i32, i32* %ret, align 4
+ ret i32 %33
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %x = alloca i32, align 4
+ store i32 0, i32* %retval
+ %call = call i32 (i32, i32, i32, %struct.AAA*, %struct.AAA*, ...) @foo(i32 1, i32 3, i32 5, %struct.AAA* byval align 4 @aaa, %struct.AAA* byval align 4 @fff, i32 2, %struct.AAA* byval align 4 @xxx, %struct.AAA* byval align 4 @yyy, %struct.AAA* byval align 4 @ccc, i32 4)
+ store i32 %call, i32* %x, align 4
+ %0 = load i32, i32* %x, align 4
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %0)
+ %1 = load i32, i32* %x, align 4
+ ret i32 %1
+}
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #0 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}