[AIX] Implement caller arguments passed in stack memory.

author Chris Bowler <cebowleratibm@gmail.com>

Thu, 6 Feb 2020 16:19:35 +0000 (11:19 -0500)

committer Sean Fertile <sd.fertile@gmail.com>

Thu, 6 Feb 2020 17:07:34 +0000 (12:07 -0500)
author Chris Bowler <cebowleratibm@gmail.com>
Thu, 6 Feb 2020 16:19:35 +0000 (11:19 -0500)
committer Sean Fertile <sd.fertile@gmail.com>
Thu, 6 Feb 2020 17:07:34 +0000 (12:07 -0500)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

index 2e086fa..c7e9b5f 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6838,10 +6838,10 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
      assert(IsPPC64 && "PPC32 should have split i64 values.");
      LLVM_FALLTHROUGH;
    case MVT::i1:
-  case MVT::i32:
-    State.AllocateStack(PtrByteSize, PtrByteSize);
+  case MVT::i32: {
+    const unsigned Offset = State.AllocateStack(PtrByteSize, PtrByteSize);
+    const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
      if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
-      MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
        // Promote integers if needed.
        if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
          LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
@@ -6849,38 +6849,46 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
        State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
      }
      else
-      report_fatal_error("Handling of placing parameters on the stack is "
-                         "unimplemented!");
-    return false;
+      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
  
+    return false;
+  }
    case MVT::f32:
    case MVT::f64: {
      // Parameter save area (PSA) is reserved even if the float passes in fpr.
      const unsigned StoreSize = LocVT.getStoreSize();
      // Floats are always 4-byte aligned in the PSA on AIX.
      // This includes f64 in 64-bit mode for ABI compatibility.
-    State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
-    if (unsigned Reg = State.AllocateReg(FPR))
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-    else
-      report_fatal_error("Handling of placing parameters on the stack is "
-                         "unimplemented!");
+    const unsigned Offset = State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
+    unsigned FReg = State.AllocateReg(FPR);
+    if (FReg)
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
  
-    // AIX requires that GPRs are reserved for float arguments.
-    // Successfully reserved GPRs are only initialized for vararg calls.
-    MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+    // Reserve and initialize GPRs or initialize the PSA as required.
+    const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
      for (unsigned I = 0; I < StoreSize; I += PtrByteSize) {
        if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+        assert(FReg && "An FPR should be available when a GPR is reserved.");
          if (State.isVarArg()) {
+          // Successfully reserved GPRs are only initialized for vararg calls.
            // Custom handling is required for:
            //   f64 in PPC32 needs to be split into 2 GPRs.
            //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
            State.addLoc(
                CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
          }
-      } else if (State.isVarArg()) {
-        report_fatal_error("Handling of placing parameters on the stack is "
-                           "unimplemented!");
+      } else {
+        // If there are insufficient GPRs, the PSA needs to be initialized.
+        // Initialization occurs even if an FPR was initialized for
+        // compatibility with the AIX XL compiler. The full memory for the
+        // argument will be initialized even if a prior word is saved in GPR.
+        // A custom memLoc is used when the argument also passes in FPR so
+        // that the callee handling can skip over it easily.
+        State.addLoc(
+            FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
+                                             LocInfo)
+                 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+        break;
        }
      }
  
@@ -6963,27 +6971,36 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
    CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize);
    CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
  
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
-    SDValue ArgValue;
-    ISD::ArgFlagsTy Flags = Ins[i].Flags;
-    if (VA.isRegLoc()) {
-      EVT ValVT = VA.getValVT();
-      MVT LocVT = VA.getLocVT();
-      MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
-      unsigned VReg =
-          MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
-      ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
-      if (ValVT.isScalarInteger() &&
-          (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
-        ArgValue =
-            truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
-      }
-      InVals.push_back(ArgValue);
-    } else {
-      report_fatal_error("Handling of formal arguments on the stack is "
-                         "unimplemented!");
+  for (CCValAssign &VA : ArgLocs) {
+
+    if (VA.isMemLoc()) {
+      // For compatibility with the AIX XL compiler, the float args in the
+      // parameter save area are initialized even if the argument is available
+      // in register.  The caller is required to initialize both the register
+      // and memory, however, the callee can choose to expect it in either.  The
+      // memloc is dismissed here because the argument is retrieved from the
+      // register.
+      if (VA.needsCustom())
+        continue;
+      report_fatal_error(
+          "Handling of formal arguments on the stack is unimplemented!");
+    }
+
+    assert(VA.isRegLoc() && "Unexpected argument location.");
+
+    EVT ValVT = VA.getValVT();
+    MVT LocVT = VA.getLocVT();
+    MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
+    unsigned VReg =
+        MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
+    SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+    if (ValVT.isScalarInteger() &&
+        (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
+      ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
+      ArgValue =
+          truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
      }
+    InVals.push_back(ArgValue);
    }
  
    // Area that is at least reserved in the caller of this function.
@@ -7035,6 +7052,7 @@ SDValue PPCTargetLowering::LowerCall_AIX(
    // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
    const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
    const bool IsPPC64 = Subtarget.isPPC64();
+  const EVT PtrVT = getPointerTy(DAG.getDataLayout());
    const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
    CCInfo.AllocateStack(LinkageSize, PtrByteSize);
    CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
@@ -7046,7 +7064,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
    // conservatively assume that it is needed.  As such, make sure we have at
    // least enough stack space for the caller to store the 8 GPRs.
    const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
-  const unsigned NumBytes = LinkageSize + MinParameterSaveAreaSize;
+  const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
+                                     CCInfo.getNextStackOffset());
  
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass.
@@ -7054,20 +7073,23 @@ SDValue PPCTargetLowering::LowerCall_AIX(
    SDValue CallSeqStart = Chain;
  
    SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Set up a copy of the stack pointer for loading and storing any
+  // arguments that may not fit in the registers available for argument
+  // passing.
+  const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
+                                   : DAG.getRegister(PPC::R1, MVT::i32);
  
    for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
      CCValAssign &VA = ArgLocs[I++];
  
-    if (VA.isMemLoc())
-      report_fatal_error("Handling of placing parameters on the stack is "
-                         "unimplemented!");
-    if (!VA.isRegLoc())
-      report_fatal_error(
-          "Unexpected non-register location for function call argument.");
-
      SDValue Arg = OutVals[VA.getValNo()];
  
-    if (!VA.needsCustom()) {
+    if (!VA.isRegLoc() && !VA.isMemLoc())
+      report_fatal_error("Unexpected location for function call argument.");
+
+    if (VA.isRegLoc() && !VA.needsCustom()) {
        switch (VA.getLocInfo()) {
        default:
          report_fatal_error("Unexpected argument extension type.");
@@ -7085,11 +7107,21 @@ SDValue PPCTargetLowering::LowerCall_AIX(
        continue;
      }
  
+    if (VA.isMemLoc()) {
+      SDValue PtrOff =
+          DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
+      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+      MemOpChains.push_back(
+          DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
+
+      continue;
+    }
+
      // Custom handling is used for GPR initializations for vararg float
      // arguments.
-    assert(CFlags.IsVarArg && VA.getValVT().isFloatingPoint() &&
-           VA.getLocVT().isInteger() &&
-           "Unexpected custom register handling for calling convention.");
+    assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
+           VA.getValVT().isFloatingPoint() && VA.getLocVT().isInteger() &&
+           "Unexpected register handling for calling convention.");
  
      SDValue ArgAsInt =
          DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
@@ -7112,15 +7144,24 @@ SDValue PPCTargetLowering::LowerCall_AIX(
                                       DAG.getConstant(32, dl, MVT::i8));
        RegsToPass.push_back(std::make_pair(
            GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
-      assert(I != E && "A second custom GPR is expected!");
-      CCValAssign &GPR2 = ArgLocs[I++];
-      assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() &&
-             GPR2.needsCustom() && "A second custom GPR is expected!");
-      RegsToPass.push_back(std::make_pair(
-          GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+
+      if (I != E) {
+        // If only 1 GPR was available, there will only be one custom GPR and
+        // the argument will also pass in memory.
+        CCValAssign &PeekArg = ArgLocs[I];
+        if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
+          assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
+          CCValAssign &GPR2 = ArgLocs[I++];
+          RegsToPass.push_back(std::make_pair(
+              GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+        }
+      }
      }
    }
  
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
    // For indirect calls, we need to save the TOC base to the stack for
    // restoration after the call.
    if (CFlags.IsIndirect) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll

index 7ef16d7..83fd835 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -447,41 +447,109 @@ entry:
  
  ; CHECK-LABEL: name: call_test_fpr_max{{.*}}
  
-; 32BIT:      renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got)
-; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1)
-; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
-; 32BIT-NEXT: $f2 = COPY renamable $f1
-; 32BIT-NEXT: $f3 = COPY renamable $f1
-; 32BIT-NEXT: $f4 = COPY renamable $f1
-; 32BIT-NEXT: $f5 = COPY renamable $f1
-; 32BIT-NEXT: $f6 = COPY renamable $f1
-; 32BIT-NEXT: $f7 = COPY renamable $f1
-; 32BIT-NEXT: $f8 = COPY renamable $f1
-; 32BIT-NEXT: $f9 = COPY renamable $f1
-; 32BIT-NEXT: $f10 = COPY renamable $f1
-; 32BIT-NEXT: $f11 = COPY renamable $f1
-; 32BIT-NEXT: $f12 = COPY renamable $f1
-; 32BIT-NEXT: $f13 = COPY renamable $f1
-; 32BIT-NEXT: BL_NOP <mcsymbol .test_fpr_max>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1
-; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
-
-; 64BIT:      renamable $x3 = LDtoc @d1, $x2 :: (load 8 from got)
-; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load 8 from @d1)
-; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
-; 64BIT-NEXT: $f2 = COPY renamable $f1
-; 64BIT-NEXT: $f3 = COPY renamable $f1
-; 64BIT-NEXT: $f4 = COPY renamable $f1
-; 64BIT-NEXT: $f5 = COPY renamable $f1
-; 64BIT-NEXT: $f6 = COPY renamable $f1
-; 64BIT-NEXT: $f7 = COPY renamable $f1
-; 64BIT-NEXT: $f8 = COPY renamable $f1
-; 64BIT-NEXT: $f9 = COPY renamable $f1
-; 64BIT-NEXT: $f10 = COPY renamable $f1
-; 64BIT-NEXT: $f11 = COPY renamable $f1
-; 64BIT-NEXT: $f12 = COPY renamable $f1
-; 64BIT-NEXT: $f13 = COPY renamable $f1
+; 32BIT:      renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
+; 32BIT-NEXT: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-DAG:  STFD renamable $f1, 56, $r1 :: (store 8)
+; 32BIT-DAG:  STFD renamable $f1, 64, $r1 :: (store 8)
+; 32BIT-DAG:  STFD renamable $f1, 72, $r1 :: (store 8)
+; 32BIT-DAG:  STFD renamable $f1, 80, $r1 :: (store 8)
+; 32BIT-DAG:  STFD renamable $f1, 88, $r1 :: (store 8)
+; 32BIT-DAG:  STFD renamable $f1, 96, $r1 :: (store 8)
+; 32BIT-DAG:  STFD renamable $f1, 104, $r1 :: (store 8)
+; 32BIT-DAG:  STFD renamable $f1, 112, $r1 :: (store 8)
+; 32BIT-DAG:  STFD renamable $f1, 120, $r1 :: (store 8)
+; 32BIT-DAG:  $f2 = COPY renamable $f1
+; 32BIT-DAG:  $f3 = COPY renamable $f1
+; 32BIT-DAG:  $f4 = COPY renamable $f1
+; 32BIT-DAG:  $f5 = COPY renamable $f1
+; 32BIT-DAG:  $f6 = COPY renamable $f1
+; 32BIT-DAG:  $f7 = COPY renamable $f1
+; 32BIT-DAG:  $f8 = COPY renamable $f1
+; 32BIT-DAG:  $f9 = COPY renamable $f1
+; 32BIT-DAG:  $f10 = COPY renamable $f1
+; 32BIT-DAG:  $f11 = COPY renamable $f1
+; 32BIT-DAG:  $f12 = COPY renamable $f1
+; 32BIT-DAG:  $f13 = COPY renamable $f1
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_fpr_max>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1, implicit-def dead $f1
+; 32BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_fpr_max:
+
+; ASM32PWR4:       stwu 1, -128(1)
+; ASM32PWR4-NEXT:  lwz [[REG:[0-9]+]], LC2(2)
+; ASM32PWR4-NEXT:  lfd 1, 0([[REG]])
+; ASM32PWR4-DAG:   stfd 1, 56(1)
+; ASM32PWR4-DAG:   stfd 1, 64(1)
+; ASM32PWR4-DAG:   stfd 1, 72(1)
+; ASM32PWR4-DAG:   stfd 1, 80(1)
+; ASM32PWR4-DAG:   stfd 1, 88(1)
+; ASM32PWR4-DAG:   stfd 1, 96(1)
+; ASM32PWR4-DAG:   stfd 1, 104(1)
+; ASM32PWR4-DAG:   stfd 1, 112(1)
+; ASM32PWR4-DAG:   stfd 1, 120(1)
+; ASM32PWR4-DAG:   fmr 2, 1
+; ASM32PWR4-DAG:   fmr 3, 1
+; ASM32PWR4-DAG:   fmr 4, 1
+; ASM32PWR4-DAG:   fmr 5, 1
+; ASM32PWR4-DAG:   fmr 6, 1
+; ASM32PWR4-DAG:   fmr 7, 1
+; ASM32PWR4-DAG:   fmr 8, 1
+; ASM32PWR4-DAG:   fmr 9, 1
+; ASM32PWR4-DAG:   fmr 10, 1
+; ASM32PWR4-DAG:   fmr 11, 1
+; ASM32PWR4-DAG:   fmr 12, 1
+; ASM32PWR4-DAG:   fmr 13, 1
+; ASM32PWR4-NEXT:  bl .test_fpr_max
+; ASM32PWR4-NEXT:  nop
+; ASM32PWR4-NEXT:  addi 1, 1, 128
+
+; 64BIT:      renamable $x[[REGD1ADDR:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x[[REGD1ADDR:[0-9]+]] :: (dereferenceable load 8 from @d1)
+; 64BIT-NEXT: ADJCALLSTACKDOWN 152, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-DAG:  STFD renamable $f1, 112, $x1 :: (store 8)
+; 64BIT-DAG:  STFD renamable $f1, 120, $x1 :: (store 8)
+; 64BIT-DAG:  STFD renamable $f1, 128, $x1 :: (store 8)
+; 64BIT-DAG:  STFD renamable $f1, 136, $x1 :: (store 8)
+; 64BIT-DAG:  STFD renamable $f1, 144, $x1 :: (store 8)
+; 64BIT-DAG:  $f2 = COPY renamable $f1
+; 64BIT-DAG:  $f3 = COPY renamable $f1
+; 64BIT-DAG:  $f4 = COPY renamable $f1
+; 64BIT-DAG:  $f5 = COPY renamable $f1
+; 64BIT-DAG:  $f6 = COPY renamable $f1
+; 64BIT-DAG:  $f7 = COPY renamable $f1
+; 64BIT-DAG:  $f8 = COPY renamable $f1
+; 64BIT-DAG:  $f9 = COPY renamable $f1
+; 64BIT-DAG:  $f10 = COPY renamable $f1
+; 64BIT-DAG:  $f11 = COPY renamable $f1
+; 64BIT-DAG:  $f12 = COPY renamable $f1
+; 64BIT-DAG:  $f13 = COPY renamable $f1
  ; 64BIT-NEXT: BL8_NOP <mcsymbol .test_fpr_max>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $x2, implicit-def $r1
-; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 152, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:       stdu 1, -160(1)
+; ASM64PWR4-NEXT:  ld [[REG:[0-9]+]], LC2(2)
+; ASM64PWR4-NEXT:  lfd 1, 0([[REG]])
+; ASM64PWR4-DAG:   stfd 1, 112(1)
+; ASM64PWR4-DAG:   stfd 1, 120(1)
+; ASM64PWR4-DAG:   stfd 1, 128(1)
+; ASM64PWR4-DAG:   stfd 1, 136(1)
+; ASM64PWR4-DAG:   stfd 1, 144(1)
+; ASM64PWR4-DAG:   fmr 2, 1
+; ASM64PWR4-DAG:   fmr 3, 1
+; ASM64PWR4-DAG:   fmr 4, 1
+; ASM64PWR4-DAG:   fmr 5, 1
+; ASM64PWR4-DAG:   fmr 6, 1
+; ASM64PWR4-DAG:   fmr 7, 1
+; ASM64PWR4-DAG:   fmr 8, 1
+; ASM64PWR4-DAG:   fmr 9, 1
+; ASM64PWR4-DAG:   fmr 10, 1
+; ASM64PWR4-DAG:   fmr 11, 1
+; ASM64PWR4-DAG:   fmr 12, 1
+; ASM64PWR4-DAG:   fmr 13, 1
+; ASM64PWR4-NEXT:  bl .test_fpr_max
+; ASM64PWR4-NEXT:  nop
+; ASM64PWR4-NEXT:  addi 1, 1, 160
  
  define double @test_fpr_max(double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13) {
  entry:
@@ -829,8 +897,8 @@ entry:
  ; ASM64PWR4-NEXT: lfd 2, 0([[REG]])
  ; ASM64PWR4-NEXT: li 3, 42
  ; ASM64PWR4-NEXT: stfd 2, 120(1)
-; ASM64PWR4-DAG: ld 4, 112(1)
-; ASM64PWR4-DAG: ld 6, 120(1)
+; ASM64PWR4-DAG:  ld 4, 112(1)
+; ASM64PWR4-DAG:  ld 6, 120(1)
  ; ASM64PWR4-NEXT: bl .test_vararg
  ; ASM64PWR4-NEXT: nop
  
@@ -878,3 +946,323 @@ entry:
  ; ASM64PWR4-NEXT: lwz 4, 124(1)
  ; ASM64PWR4-NEXT: bl .test_vararg
  ; ASM64PWR4-NEXT: nop
+
+@c = common global i8 0, align 1
+@si = common global i16 0, align 2
+@i = common global i32 0, align 4
+@lli = common global i64 0, align 8
+@f = common global float 0.000000e+00, align 4
+@d = common global double 0.000000e+00, align 8
+
+; Basic saving of integral type arguments to the parameter save area.
+define void @call_test_stackarg_int() {
+entry:
+  %0 = load i8, i8* @c, align 1
+  %1 = load i16, i16* @si, align 2
+  %2 = load i32, i32* @i, align 4
+  %3 = load i64, i64* @lli, align 8
+  %4 = load i32, i32* @i, align 4
+  call void @test_stackarg_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i8 zeroext %0, i16 signext %1, i32 %2, i64 %3, i32 %4)
+  ret void
+}
+
+declare void @test_stackarg_int(i32, i32, i32, i32, i32, i32, i32, i32, i8 zeroext, i16 signext, i32, i64, i32)
+
+; CHECK-LABEL:     name: call_test_stackarg_int{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT-DAG:  renamable $r[[REGCADDR:[0-9]+]] = LWZtoc @c, $r2 :: (load 4 from got)
+; 32BIT-DAG:  renamable $r[[REGC:[0-9]+]] = LBZ 0, killed renamable $r[[REGCADDR]] :: (dereferenceable load 1 from @c)
+; 32BIT-DAG:  renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @si, $r2 :: (load 4 from got)
+; 32BIT-DAG:  renamable $r[[REGSI:[0-9]+]] = LHA 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load 2 from @si)
+; 32BIT-DAG:  renamable $r[[REGIADDR:[0-9]+]] = LWZtoc @i, $r2 :: (load 4 from got)
+; 32BIT-DAG:  renamable $r[[REGI:[0-9]+]] = LWZ 0, killed renamable $r[[REGIADDR]] :: (dereferenceable load 4 from @i)
+; 32BIT-DAG:  renamable $r[[REGLLIADDR:[0-9]+]] = LWZtoc @lli, $r2 :: (load 4 from got)
+; 32BIT-DAG:  renamable $r[[REGLLI1:[0-9]+]] = LWZ 0, renamable $r[[REGLLIADDR]] :: (dereferenceable load 4 from @lli, align 8)
+; 32BIT-DAG:  renamable $r[[REGLLI2:[0-9]+]] = LWZ 4, killed renamable $r[[REGLLIADDR]] :: (dereferenceable load 4 from @lli + 4)
+; 32BIT-NEXT: ADJCALLSTACKDOWN 80, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-DAG:  STW killed renamable $r[[REGC]], 56, $r1 :: (store 4)
+; 32BIT-DAG:  STW killed renamable $r[[REGSI]], 60, $r1 :: (store 4)
+; 32BIT-DAG:  STW killed renamable $r[[REGI]], 64, $r1 :: (store 4)
+; 32BIT-DAG:  STW killed renamable $r[[REGLLI1]], 68, $r1 :: (store 4)
+; 32BIT-DAG:  STW killed renamable $r[[REGLLI2]], 72, $r1 :: (store 4)
+; 32BIT-DAG:  STW renamable $r[[REGI]], 76, $r1 :: (store 4)
+; 32BIT-DAG:  $r3 = LI 1
+; 32BIT-DAG:  $r4 = LI 2
+; 32BIT-DAG:  $r5 = LI 3
+; 32BIT-DAG:  $r6 = LI 4
+; 32BIT-DAG:  $r7 = LI 5
+; 32BIT-DAG:  $r8 = LI 6
+; 32BIT-DAG:  $r9 = LI 7
+; 32BIT-DAG:  $r10 = LI 8
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_stackarg_int>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 80, 0, implicit-def dead $r1, implicit $r1
+
+; Basic saving of floating point type arguments to the parameter save area.
+; The float and double arguments will pass in both fpr as well as parameter save area.
+define void @call_test_stackarg_float() {
+entry:
+  %0 = load float, float* @f, align 4
+  %1 = load double, double* @d, align 8
+  call void @test_stackarg_float(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, float %0, double %1)
+  ret void
+}
+
+declare void @test_stackarg_float(i32, i32, i32, i32, i32, i32, i32, i32, float, double)
+
+; CHECK-LABEL:     name:            call_test_stackarg_float
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT-DAG:   renamable $r[[REGF:[0-9]+]] = LWZtoc @f, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $f1 = LFS 0, killed renamable $r[[REGF]] :: (dereferenceable load 4 from @f)
+; 32BIT-DAG:   renamable $r[[REGD:[0-9]+]] = LWZtoc @d, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $f2 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load 8 from @d)
+; 32BIT-NEXT:  ADJCALLSTACKDOWN 68, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-DAG:   STFS renamable $f1, 56, $r1 :: (store 4)
+; 32BIT-DAG:   STFD renamable $f2, 60, $r1 :: (store 8)
+; 32BIT-DAG:   $r3 = LI 1
+; 32BIT-DAG:   $r4 = LI 2
+; 32BIT-DAG:   $r5 = LI 3
+; 32BIT-DAG:   $r6 = LI 4
+; 32BIT-DAG:   $r7 = LI 5
+; 32BIT-DAG:   $r8 = LI 6
+; 32BIT-DAG:   $r9 = LI 7
+; 32BIT-DAG:   $r10 = LI 8
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_stackarg_float>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $f1, implicit $f2, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 68, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_stackarg_float:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32PWR4:      stwu 1, -80(1)
+; ASM32PWR4-DAG:  lwz [[REGF:[0-9]+]], LC8(2)
+; ASM32PWR4-DAG:  lfs 1, 0([[REGF]])
+; ASM32PWR4-DAG:  lwz [[REGD:[0-9]+]], LC9(2)
+; ASM32PWR4-DAG:  lfd 2, 0([[REGD:[0-9]+]])
+; ASM32PWR4-DAG:  stfs 1, 56(1)
+; ASM32PWR4-DAG:  stfd 2, 60(1)
+; ASM32PWR4-DAG:  li 3, 1
+; ASM32PWR4-DAG:  li 4, 2
+; ASM32PWR4-DAG:  li 5, 3
+; ASM32PWR4-DAG:  li 6, 4
+; ASM32PWR4-DAG:  li 7, 5
+; ASM32PWR4-DAG:  li 8, 6
+; ASM32PWR4-DAG:  li 9, 7
+; ASM32PWR4-DAG:  li 10, 8
+; ASM32PWR4-NEXT: bl .test_stackarg_float
+; ASM32PWR4-NEXT: nop
+; ASM32PWR4-NEXT: addi 1, 1, 80
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT-DAG:   renamable $x[[REGF:[0-9]+]] = LDtoc @f, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $f1 = LFS 0, killed renamable $x[[REGF]] :: (dereferenceable load 4 from @f)
+; 64BIT-DAG:   renamable $x[[REGD:[0-9]+]] = LDtoc @d, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $f2 = LFD 0, killed renamable $x[[REGD]] :: (dereferenceable load 8 from @d)
+; 64BIT-NEXT:  ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-DAG:   STFS renamable $f1, 112, $x1 :: (store 4)
+; 64BIT-DAG:   STFD renamable $f2, 120, $x1 :: (store 8)
+; 64BIT-DAG:   $x3 = LI8 1
+; 64BIT-DAG:   $x4 = LI8 2
+; 64BIT-DAG:   $x5 = LI8 3
+; 64BIT-DAG:   $x6 = LI8 4
+; 64BIT-DAG:   $x7 = LI8 5
+; 64BIT-DAG:   $x8 = LI8 6
+; 64BIT-DAG:   $x9 = LI8 7
+; 64BIT-DAG:   $x10 = LI8 8
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_stackarg_float>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $f1, implicit $f2, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4:      stdu 1, -128(1)
+; ASM64PWR4-DAG:  ld [[REGF:[0-9]+]], LC7(2)
+; ASM64PWR4-DAG:  lfs 1, 0([[REGF]])
+; ASM64PWR4-DAG:  ld [[REGD:[0-9]+]], LC8(2)
+; ASM64PWR4-DAG:  lfd 2, 0([[REGD]])
+; ASM64PWR4-DAG:  stfs 1, 112(1)
+; ASM64PWR4-DAG:  stfd 2, 120(1)
+; ASM64PWR4-DAG:  li 3, 1
+; ASM64PWR4-DAG:  li 4, 2
+; ASM64PWR4-DAG:  li 5, 3
+; ASM64PWR4-DAG:  li 6, 4
+; ASM64PWR4-DAG:  li 7, 5
+; ASM64PWR4-DAG:  li 8, 6
+; ASM64PWR4-DAG:  li 9, 7
+; ASM64PWR4-DAG:  li 10, 8
+; ASM64PWR4-NEXT: bl .test_stackarg_float
+; ASM64PWR4-NEXT: nop
+; ASM64PWR4-NEXT: addi 1, 1, 128
+
+define void @call_test_stackarg_float2() {
+entry:
+  %0 = load double, double* @d, align 8
+  call void (i32, i32, i32, i32, i32, i32, ...) @test_stackarg_float2(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, double %0)
+  ret void
+}
+
+declare void @test_stackarg_float2(i32, i32, i32, i32, i32, i32, ...)
+
+; CHECK-LABEL:     name: call_test_stackarg_float2{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 32BIT-DAG:   renamable $r[[REG:[0-9]+]] = LWZtoc @d, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $f1 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d)
+; 32BIT-DAG:   ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-DAG:   $r3 = LI 1
+; 32BIT-DAG:   $r4 = LI 2
+; 32BIT-DAG:   $r5 = LI 3
+; 32BIT-DAG:   $r6 = LI 4
+; 32BIT-DAG:   $r7 = LI 5
+; 32BIT-DAG:   $r8 = LI 6
+; 32BIT-DAG:   STFD renamable $f1, 0, %stack.0 :: (store 8 into %stack.0)
+; 32BIT-DAG:   renamable $r9 = LWZ 0, %stack.0 :: (load 4 from %stack.0, align 8)
+; 32BIT-DAG:   renamable $r10 = LWZ 4, %stack.0 :: (load 4 from %stack.0 + 4)
+; 32BIT-NEXT:   BL_NOP <mcsymbol .test_stackarg_float2>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit $f1, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:   ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_stackarg_float2:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32PWR4:     stwu 1, -64(1)
+; ASM32PWR4-DAG: li 3, 1
+; ASM32PWR4-DAG: li 4, 2
+; ASM32PWR4-DAG: li 5, 3
+; ASM32PWR4-DAG: li 6, 4
+; ASM32PWR4-DAG: li 7, 5
+; ASM32PWR4-DAG: li 8, 6
+; ASM32PWR4-DAG: lwz [[REG:[0-9]+]], LC9(2)
+; ASM32PWR4-DAG: lfd 1, 0([[REG]])
+; ASM32PWR4-DAG: stfd 1, 56(1)
+; ASM32PWR4-DAG: lwz 9, 56(1)
+; ASM32PWR4-DAG: lwz 10, 60(1)
+; ASM32PWR4-NEXT: bl .test_stackarg_float2
+; ASM32PWR4-NEXT: nop
+; ASM32PWR4-NEXT: addi 1, 1, 64
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; 64BIT-DAG:   renamable $x[[REG:[0-9]+]] = LDtoc @d, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $f1 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d)
+; 64BIT-DAG:   ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-DAG:   $x3 = LI8 1
+; 64BIT-DAG:   $x4 = LI8 2
+; 64BIT-DAG:   $x5 = LI8 3
+; 64BIT-DAG:   $x6 = LI8 4
+; 64BIT-DAG:   $x7 = LI8 5
+; 64BIT-DAG:   $x8 = LI8 6
+; 64BIT-DAG:   STFD renamable $f1, 0, %stack.0 :: (store 8 into %stack.0)
+; 64BIT-DAG:   renamable $x9 = LD 0, %stack.0 :: (load 8 from %stack.0)
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_stackarg_float2>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit $f1, implicit $x9, implicit $x2, implicit-def $r1
+; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4:     stdu 1, -128(1)
+; ASM64PWR4-DAG: li 3, 1
+; ASM64PWR4-DAG: li 4, 2
+; ASM64PWR4-DAG: li 5, 3
+; ASM64PWR4-DAG: li 6, 4
+; ASM64PWR4-DAG: li 7, 5
+; ASM64PWR4-DAG: li 8, 6
+; ASM64PWR4-DAG: ld [[REG:[0-9]+]], LC8(2)
+; ASM64PWR4-DAG: lfd 1, 0([[REG]])
+; ASM64PWR4-DAG: stfd 1, 120(1)
+; ASM64PWR4-DAG: ld 9, 120(1)
+; ASM64PWR4-NEXT: bl .test_stackarg_float2
+; ASM64PWR4-NEXT: nop
+; ASM64PWR4-NEXT: addi 1, 1, 128
+
+; A double arg will pass on the stack in PPC32 if there is only one available GPR.
+define void @call_test_stackarg_float3() {
+entry:
+  %0 = load double, double* @d, align 8
+  %1 = load float, float* @f, align 4
+  call void (i32, i32, i32, i32, i32, i32, i32, ...) @test_stackarg_float3(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, double %0, float %1)
+  ret void
+}
+
+declare void @test_stackarg_float3(i32, i32, i32, i32, i32, i32, i32, ...)
+
+; CHECK-LABEL:     name: call_test_stackarg_float3{{.*}}
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; In 32-bit the double arg is written to memory because it cannot be fully stored in the last 32-bit GPR.
+; 32BIT-DAG:   renamable $r[[REGD:[0-9]+]] = LWZtoc @d, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $f1 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load 8 from @d)
+; 32BIT-DAG:   renamable $r[[REGF:[0-9]+]] = LWZtoc @f, $r2 :: (load 4 from got)
+; 32BIT-DAG:   renamable $f2 = LFS 0, killed renamable $r[[REGF]] :: (dereferenceable load 4 from @f)
+; 32BIT-DAG:   ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-DAG:   STFD renamable $f1, 52, $r1 :: (store 8)
+; 32BIT-DAG:   STFS renamable $f2, 60, $r1 :: (store 4)
+; 32BIT-DAG:   $r3 = LI 1
+; 32BIT-DAG:   $r4 = LI 2
+; 32BIT-DAG:   $r5 = LI 3
+; 32BIT-DAG:   $r6 = LI 4
+; 32BIT-DAG:   $r7 = LI 5
+; 32BIT-DAG:   $r8 = LI 6
+; 32BIT-DAG:   $r9 = LI 7
+; 32BIT-DAG:   STFD renamable $f1, 0, %stack.0 :: (store 8 into %stack.0)
+; 32BIT-DAG:   renamable $r10 = LWZ 0, %stack.0 :: (load 4 from %stack.0, align 8)
+; 32BIT-NEXT:  BL_NOP <mcsymbol .test_stackarg_float3>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit $f1, implicit $r10, implicit $f2, implicit $r2, implicit-def $r1
+; 32BIT-NEXT:  ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_stackarg_float3:
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM32PWR4:       stwu 1, -80(1)
+; ASM32PWR4-DAG:   lwz [[REGD:[0-9]+]], LC9(2)
+; ASM32PWR4-DAG:   lfd 1, 0([[REGD]])
+; ASM32PWR4-DAG:   lwz [[REGF:[0-9]+]], LC8(2)
+; ASM32PWR4-DAG:   lfs 2, 0([[REGF]])
+; ASM32PWR4-DAG:   stfd 1, 52(1)
+; ASM32PWR4-DAG:   stfs 2, 60(1)
+; ASM32PWR4-DAG:   li 3, 1
+; ASM32PWR4-DAG:   li 4, 2
+; ASM32PWR4-DAG:   li 5, 3
+; ASM32PWR4-DAG:   li 6, 4
+; ASM32PWR4-DAG:   li 7, 5
+; ASM32PWR4-DAG:   li 8, 6
+; ASM32PWR4-DAG:   li 9, 7
+; ASM32PWR4-DAG:   stfd 1, 72(1)
+; ASM32PWR4-DAG:   lwz 10, 72(1)
+; ASM32PWR4-NEXT:  bl .test_stackarg_float3
+; ASM32PWR4-NEXT:  nop
+; ASM32PWR4-NEXT:  addi 1, 1, 80
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; In 64-bit the double arg is not written to memory because it is fully stored in the last 64-bit GPR.
+; 64BIT-DAG:   renamable $x[[REGD:[0-9]+]] = LDtoc @d, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $f1 = LFD 0, killed renamable $x[[REGD]] :: (dereferenceable load 8 from @d)
+; 64BIT-DAG:   renamable $x[[REGF:[0-9]+]] = LDtoc @f, $x2 :: (load 8 from got)
+; 64BIT-DAG:   renamable $f2 = LFS 0, killed renamable $x[[REGF]] :: (dereferenceable load 4 from @f)
+; 64BIT-DAG:   ADJCALLSTACKDOWN 120, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-DAG:   STFS renamable $f2, 112, $x1 :: (store 4)
+; 64BIT-DAG:   $x3 = LI8 1
+; 64BIT-DAG:   $x4 = LI8 2
+; 64BIT-DAG:   $x5 = LI8 3
+; 64BIT-DAG:   $x6 = LI8 4
+; 64BIT-DAG:   $x7 = LI8 5
+; 64BIT-DAG:   $x8 = LI8 6
+; 64BIT-DAG:   $x9 = LI8 7
+; 64BIT-DAG:   STFD renamable $f1, 0, %stack.0 :: (store 8 into %stack.0)
+; 64BIT-DAG:   renamable $x10 = LD 0, %stack.0 :: (load 8 from %stack.0)
+; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_stackarg_float3>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit $f1, implicit $x10, implicit $f2, implicit $x2, implicit-def $r1
+
+; 64BIT-NEXT: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1
+
+; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
+; ASM64PWR4:       stdu 1, -128(1)
+; ASM64PWR4-DAG:   ld [[REGD:[0-9]+]], LC8(2)
+; ASM64PWR4-DAG:   lfd 1, 0([[REGD]])
+; ASM64PWR4-DAG:   ld [[REGF:[0-9]+]], LC7(2)
+; ASM64PWR4-DAG:   lfs 2, 0([[REGF]])
+; ASM64PWR4-DAG:   stfs 2, 112(1)
+; ASM64PWR4-DAG:   li 3, 1
+; ASM64PWR4-DAG:   li 4, 2
+; ASM64PWR4-DAG:   li 5, 3
+; ASM64PWR4-DAG:   li 6, 4
+; ASM64PWR4-DAG:   li 7, 5
+; ASM64PWR4-DAG:   li 8, 6
+; ASM64PWR4-DAG:   li 9, 7
+; ASM64PWR4-DAG:   stfd 1, 120(1)
+; ASM64PWR4-DAG:   ld 10, 120(1)
+; ASM64PWR4-NEXT:  bl .test_stackarg_float3
+; ASM64PWR4-NEXT:  nop
+; ASM64PWR4-NEXT:  addi 1, 1, 128
diff --git a/llvm/test/CodeGen/PowerPC/aix-stackargs.ll b/llvm/test/CodeGen/PowerPC/aix-stackargs.ll

deleted file mode 100644 (file)

index 6cac691..0000000
--- a/llvm/test/CodeGen/PowerPC/aix-stackargs.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: not llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
-; RUN: not llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s
-
-define void @bar() {
-entry:
-  call void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
-  ret void
-}
-
-declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, i32)
-
-; CHECK: LLVM ERROR: Handling of placing parameters on the stack is unimplemented!
author	Chris Bowler <cebowleratibm@gmail.com>
	Thu, 6 Feb 2020 16:19:35 +0000 (11:19 -0500)
committer	Sean Fertile <sd.fertile@gmail.com>
	Thu, 6 Feb 2020 17:07:34 +0000 (12:07 -0500)
llvm/lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/PowerPC/aix-cc-abi.ll		patch \| blob \| history
llvm/test/CodeGen/PowerPC/aix-stackargs.ll	[deleted file]	patch \| blob \| history