From 2b553d488fbeaaa909f68858e435df75fdbe2fc4 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 1 Aug 2014 09:17:39 +0000 Subject: [PATCH] [mips][PR19612] Fix va_arg for big-endian mode. Summary: Big-endian mode was not correctly adjusting the offset for types smaller than an ABI slot. Fixes PR19612 Reviewers: dsanders Reviewed By: dsanders Subscribers: sstankovic, llvm-commits Differential Revision: http://reviews.llvm.org/D4556 llvm-svn: 214493 --- llvm/lib/Target/Mips/MipsISelLowering.cpp | 69 +- llvm/lib/Target/Mips/MipsISelLowering.h | 1 + .../Mips/cconv/arguments-hard-float-varargs.ll | 22 +- llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll | 1104 ++++++++++++++++++++ 4 files changed, 1185 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index bfecb96..db31ea8 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -251,7 +251,6 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM, setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); - setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); @@ -343,7 +342,8 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM, setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); @@ -392,6 +392,11 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM, setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2); + // The arguments on the stack are defined in terms of 4-byte slots on O32 + // and 8-byte slots on N32/N64. + setMinStackArgumentAlignment( + (Subtarget.isABI_N32() || Subtarget.isABI_N64()) ? 8 : 4); + setStackPointerRegisterToSaveRestore(Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP); @@ -792,6 +797,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG); case ISD::SETCC: return lowerSETCC(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); + case ISD::VAARG: return lowerVAARG(Op, DAG); case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG); case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); @@ -1755,6 +1761,65 @@ SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV), false, false, 0); } +SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + SDValue Chain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + unsigned Align = Node->getConstantOperandVal(3); + const Value *SV = cast(Node->getOperand(2))->getValue(); + SDLoc DL(Node); + unsigned ArgSlotSizeInBytes = + (Subtarget.isABI_N32() || Subtarget.isABI_N64()) ? 8 : 4; + + SDValue VAListLoad = DAG.getLoad(getPointerTy(), DL, Chain, VAListPtr, + MachinePointerInfo(SV), false, false, false, + 0); + SDValue VAList = VAListLoad; + + // Re-align the pointer if necessary. + // It should only ever be necessary for 64-bit types on O32 since the minimum + // argument alignment is the same as the maximum type alignment for N32/N64. + // + // FIXME: We currently align too often. The code generator doesn't notice + // when the pointer is still aligned from the last va_arg (or pair of + // va_args for the i64 on O32 case). + if (Align > getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, + DAG.getConstant(Align - 1, + VAList.getValueType())); + + VAList = DAG.getNode(ISD::AND, DL, VAList.getValueType(), VAList, + DAG.getConstant(-(int64_t)Align, + VAList.getValueType())); + } + + // Increment the pointer, VAList, to the next vaarg. + unsigned ArgSizeInBytes = getDataLayout()->getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); + SDValue Tmp3 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, + DAG.getConstant(RoundUpToAlignment(ArgSizeInBytes, ArgSlotSizeInBytes), + VAList.getValueType())); + // Store the incremented VAList to the legalized pointer + Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr, + MachinePointerInfo(SV), false, false, 0); + + // In big-endian mode we must adjust the pointer when the load size is smaller + // than the argument slot size. We must also reduce the known alignment to + // match. For example in the N64 ABI, we must add 4 bytes to the offset to get + // the correct half of the slot, and reduce the alignment from 8 (slot + // alignment) down to 4 (type alignment). + if (!Subtarget.isLittle() && ArgSizeInBytes < ArgSlotSizeInBytes) { + unsigned Adjustment = ArgSlotSizeInBytes - ArgSizeInBytes; + VAList = DAG.getNode(ISD::ADD, DL, VAListPtr.getValueType(), VAList, + DAG.getIntPtrConstant(Adjustment)); + } + // Load the actual argument out of the pointer VAList + return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo(), false, false, + false, 0); +} + static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasExtractInsert) { EVT TyX = Op.getOperand(0).getValueType(); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 10e4e0b..e033059 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -482,6 +482,7 @@ namespace llvm { SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll b/llvm/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll index aadf7d1..70ccf14 100644 --- a/llvm/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll +++ b/llvm/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll @@ -4,11 +4,11 @@ ; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s ; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s -; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s -; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s +; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWBE %s +; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWLE %s -; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s -; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s +; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWBE %s +; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWLE %s ; Test the effect of varargs on floating point types in the non-variable part ; of the argument list as specified by section 2 of the MIPSpro N32 Handbook. @@ -34,6 +34,7 @@ entry: %b = va_arg i8** %ap, double %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2 store volatile double %b, double* %1 + call void @llvm.va_end(i8* %ap2) ret void } @@ -98,6 +99,7 @@ entry: %b = va_arg i8** %ap, float %1 = getelementptr [11 x float]* @floats, i32 0, i32 2 store volatile float %b, float* %1 + call void @llvm.va_end(i8* %ap2) ret void } @@ -140,16 +142,18 @@ entry: ; Increment the pointer then get the varargs arg ; LLVM will rebind the load to the stack pointer instead of the varargs pointer ; during lowering. This is fine and doesn't change the behaviour. -; N32/N64 is using ori instead of addiu/daddiu but (although odd) this is fine -; since the stack is always aligned. +; Also, in big-endian mode the offset must be increased by 4 to retrieve the +; correct half of the argument slot. +; ; O32-DAG: addiu [[VAPTR]], [[VAPTR]], 4 ; O32-DAG: sw [[VAPTR]], 4($sp) -; N32-DAG: ori [[VAPTR]], [[VAPTR]], 4 +; N32-DAG: addiu [[VAPTR]], [[VAPTR]], 8 ; N32-DAG: sw [[VAPTR]], 4($sp) -; N64-DAG: ori [[VAPTR]], [[VAPTR]], 4 +; N64-DAG: daddiu [[VAPTR]], [[VAPTR]], 8 ; N64-DAG: sd [[VAPTR]], 0($sp) ; O32-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp) -; NEW-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 8($sp) +; NEWLE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 8($sp) +; NEWBE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp) ; ALL-DAG: swc1 [[FTMP1]], 8([[R2]]) declare void @llvm.va_start(i8*) diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll b/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll new file mode 100644 index 0000000..de369e7 --- /dev/null +++ b/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll @@ -0,0 +1,1104 @@ +; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s +; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s + +; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s +; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s + +; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-BE %s +; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-LE %s + +; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-BE %s +; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-LE %s + +@hwords = global [3 x i16] zeroinitializer, align 1 +@words = global [3 x i32] zeroinitializer, align 1 +@dwords = global [3 x i64] zeroinitializer, align 1 + +define void @fn_i16_dotdotdot_i16(i16 %a, ...) { +entry: +; ALL-LABEL: fn_i16_dotdotdot_i16: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)( + +; ALL-DAG: sh [[ARG1]], 2([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sh [[ARG2]], 4([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i16 + %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1 + store volatile i16 %arg1, i16* %e1, align 2 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i16 + %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2 + store volatile i16 %arg2, i16* %e2, align 2 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i16_dotdotdot_i32(i16 %a, ...) { +entry: +; ALL-LABEL: fn_i16_dotdotdot_i32: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)( + +; ALL-DAG: sw [[ARG1]], 4([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sw [[ARG2]], 8([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i32 + %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1 + store volatile i32 %arg1, i32* %e1, align 4 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i32 + %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2 + store volatile i32 %arg2, i32* %e2, align 4 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i16_dotdotdot_i64(i16 %a, ...) { +entry: +; ALL-LABEL: fn_i16_dotdotdot_i64: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] (and realign pointer for O32) +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion and copy it to the global. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 8([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 12([[GV]]) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)( +; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-DAG: sd [[ARG1]], 8([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; FIXME: We're still aligned from the last one but CodeGen doesn't spot that. +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion and copy it to the global. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 16([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 20([[GV]]) + +; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-DAG: sd [[ARG2]], 16([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i64 + %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1 + store volatile i64 %arg1, i64* %e1, align 8 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i64 + %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2 + store volatile i64 %arg2, i64* %e2, align 8 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i32_dotdotdot_i16(i32 %a, ...) { +entry: +; ALL-LABEL: fn_i32_dotdotdot_i16: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)( + +; ALL-DAG: sh [[ARG1]], 2([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sh [[ARG2]], 4([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i16 + %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1 + store volatile i16 %arg1, i16* %e1, align 2 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i16 + %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2 + store volatile i16 %arg2, i16* %e2, align 2 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i32_dotdotdot_i32(i32 %a, ...) { +entry: +; ALL-LABEL: fn_i32_dotdotdot_i32: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)( + +; ALL-DAG: sw [[ARG1]], 4([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sw [[ARG2]], 8([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i32 + %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1 + store volatile i32 %arg1, i32* %e1, align 4 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i32 + %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2 + store volatile i32 %arg2, i32* %e2, align 4 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i32_dotdotdot_i64(i32 %a, ...) { +entry: +; ALL-LABEL: fn_i32_dotdotdot_i64: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) +; O32-DAG: sw $5, 12([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 12 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first +; fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] (and realign pointer for O32) +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion and copy it to the global. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 8([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 12([[GV]]) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)( +; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-DAG: sd [[ARG1]], 8([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; FIXME: We're still aligned from the last one but CodeGen doesn't spot that. +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion and copy it to the global. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 16([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 20([[GV]]) + +; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-DAG: sd [[ARG2]], 16([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i64 + %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1 + store volatile i64 %arg1, i64* %e1, align 8 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i64 + %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2 + store volatile i64 %arg2, i64* %e2, align 8 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i64_dotdotdot_i16(i64 %a, ...) { +entry: +; ALL-LABEL: fn_i64_dotdotdot_i16: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 16 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the +; first fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)( + +; ALL-DAG: sh [[ARG1]], 2([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sh [[ARG2]], 4([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i16 + %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1 + store volatile i16 %arg1, i16* %e1, align 2 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i16 + %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2 + store volatile i16 %arg2, i16* %e2, align 2 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i64_dotdotdot_i32(i64 %a, ...) { +entry: +; ALL-LABEL: fn_i64_dotdotdot_i32: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 16 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the +; first fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]]) + +; Copy the arg to the global +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words) + +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)( + +; ALL-DAG: sw [[ARG1]], 4([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) + +; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]]) + +; Copy the arg to the global +; ALL-DAG: sw [[ARG2]], 8([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i32 + %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1 + store volatile i32 %arg1, i32* %e1, align 4 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i32 + %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2 + store volatile i32 %arg2, i32* %e2, align 4 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +define void @fn_i64_dotdotdot_i64(i64 %a, ...) { +entry: +; ALL-LABEL: fn_i64_dotdotdot_i64: + +; Set up the stack with an 8-byte local area. N32/N64 must also make room for +; the argument save area (56 bytes). +; O32: addiu [[SP:\$sp]], $sp, -8 +; N32: addiu [[SP:\$sp]], $sp, -64 +; N64: daddiu [[SP:\$sp]], $sp, -64 + +; Save variable argument portion on the stack +; O32-DAG: sw $7, 20([[SP]]) +; O32-DAG: sw $6, 16([[SP]]) + +; NEW-DAG: sd $11, 56([[SP]]) +; NEW-DAG: sd $10, 48([[SP]]) +; NEW-DAG: sd $9, 40([[SP]]) +; NEW-DAG: sd $8, 32([[SP]]) +; NEW-DAG: sd $7, 24([[SP]]) +; NEW-DAG: sd $6, 16([[SP]]) +; NEW-DAG: sd $5, 8([[SP]]) + +; Initialize variable argument pointer. +; For O32, the offset is 16 due to the 4 bytes used to store local variables, +; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the +; first fixed argument. +; For N32/N64, it is only 8 since the fixed arguments do not reserve stack +; space. +; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16 +; O32-DAG: sw [[VA]], 0([[SP]]) + +; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8 +; N32-DAG: sw [[VA]], 0([[SP]]) + +; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8 +; N64-DAG: sd [[VA]], 0([[SP]]) + +; Store [[VA]] +; O32-DAG: sw [[VA]], 0([[SP]]) + +; ALL: # ANCHOR1 + +; Increment [[VA]] (and realign pointer for O32) +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N32-DAG: sw [[VA2]], 0([[SP]]) + +; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]]) +; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8 +; N64-DAG: sd [[VA2]], 0([[SP]]) + +; Load the first argument from the variable portion and copy it to the global. +; This has used the stack pointer directly rather than the [[VA]] we just set +; up. +; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte +; order. +; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 8([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG1]], 12([[GV]]) + +; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) +; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)( +; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]]) +; NEW-DAG: sd [[ARG1]], 8([[GV]]) + +; ALL: # ANCHOR2 + +; Increment [[VA]] again. +; FIXME: We're still aligned from the last one but CodeGen doesn't spot that. +; O32: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7 +; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]] +; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) + +; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]]) +; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N32-DAG: sw [[VA3]], 0([[SP]]) + +; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]]) +; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8 +; N64-DAG: sd [[VA3]], 0([[SP]]) + +; Load the second argument from the variable portion and copy it to the global. +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 16([[GV]]) +; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) +; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: sw [[VA2]], 0([[SP]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: sw [[ARG2]], 20([[GV]]) + +; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) +; NEW-DAG: sd [[ARG2]], 16([[GV]]) + + %ap = alloca i8*, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap2) + + call void asm sideeffect "# ANCHOR1", ""() + %arg1 = va_arg i8** %ap, i64 + %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1 + store volatile i64 %arg1, i64* %e1, align 8 + + call void asm sideeffect "# ANCHOR2", ""() + %arg2 = va_arg i8** %ap, i64 + %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2 + store volatile i64 %arg2, i64* %e2, align 8 + + call void @llvm.va_end(i8* %ap2) + + ret void +} + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) -- 2.7.4