[X86] Codegen for preallocated

author Arthur Eubanks <aeubanks@google.com>

Mon, 16 Mar 2020 19:32:36 +0000 (12:32 -0700)

committer Arthur Eubanks <aeubanks@google.com>

Wed, 20 May 2020 16:20:38 +0000 (09:20 -0700)
author Arthur Eubanks <aeubanks@google.com>
Mon, 16 Mar 2020 19:32:36 +0000 (12:32 -0700)
committer Arthur Eubanks <aeubanks@google.com>
Wed, 20 May 2020 16:20:38 +0000 (09:20 -0700)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h

index a0e0006..839e82d 100644 (file)
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -906,6 +906,13 @@ enum NodeType {
    VAEND,
    VASTART,
  
+  // PREALLOCATED_SETUP - This has 2 operands: an input chain and a SRCVALUE
+  // with the preallocated call Value.
+  PREALLOCATED_SETUP,
+  // PREALLOCATED_ARG - This has 3 operands: an input chain, a SRCVALUE
+  // with the preallocated call Value, and a constant int.
+  PREALLOCATED_ARG,
+
    /// SRCVALUE - This is a node type that holds a Value* that is used to
    /// make reference to a value in the LLVM IR.
    SRCVALUE,
diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h

index 07f7f81..6ccc1ce 100644 (file)
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -35,6 +35,7 @@ namespace ISD {
      unsigned IsReturned : 1; ///< Always returned
      unsigned IsSplit : 1;
      unsigned IsInAlloca : 1;   ///< Passed with inalloca
+    unsigned IsPreallocated : 1; ///< ByVal without the copy
      unsigned IsSplitEnd : 1;   ///< Last part of a split
      unsigned IsSwiftSelf : 1;  ///< Swift self parameter
      unsigned IsSwiftError : 1; ///< Swift error parameter
@@ -56,9 +57,9 @@ namespace ISD {
    public:
      ArgFlagsTy()
          : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
-          IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0),
-          IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0),
-          IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
+          IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0),
+          IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0),
+          IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
            IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
            IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0),
            PointerAddrSpace(0) {
@@ -83,6 +84,9 @@ namespace ISD {
      bool isInAlloca() const { return IsInAlloca; }
      void setInAlloca() { IsInAlloca = 1; }
  
+    bool isPreallocated() const { return IsPreallocated; }
+    void setPreallocated() { IsPreallocated = 1; }
+
      bool isSwiftSelf() const { return IsSwiftSelf; }
      void setSwiftSelf() { IsSwiftSelf = 1; }
  
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h

index 2cab257..db501c8 100644 (file)
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -273,17 +273,20 @@ public:
      bool IsNest : 1;
      bool IsByVal : 1;
      bool IsInAlloca : 1;
+    bool IsPreallocated : 1;
      bool IsReturned : 1;
      bool IsSwiftSelf : 1;
      bool IsSwiftError : 1;
      bool IsCFGuardTarget : 1;
      MaybeAlign Alignment = None;
      Type *ByValType = nullptr;
+    Type *PreallocatedType = nullptr;
  
      ArgListEntry()
          : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
-          IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
-          IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {}
+          IsNest(false), IsByVal(false), IsInAlloca(false),
+          IsPreallocated(false), IsReturned(false), IsSwiftSelf(false),
+          IsSwiftError(false), IsCFGuardTarget(false) {}
  
      void setAttributes(const CallBase *Call, unsigned ArgIdx);
    };
@@ -3608,6 +3611,7 @@ public:
      bool IsReturnValueUsed : 1;
      bool IsConvergent      : 1;
      bool IsPatchPoint      : 1;
+    bool IsPreallocated : 1;
  
      // IsTailCall should be modified by implementations of
      // TargetLowering::LowerCall that perform tail call conversions.
@@ -3631,7 +3635,7 @@ public:
      CallLoweringInfo(SelectionDAG &DAG)
          : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
            DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
-          IsPatchPoint(false), DAG(DAG) {}
+          IsPatchPoint(false), IsPreallocated(false), DAG(DAG) {}
  
      CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
        DL = dl;
@@ -3737,6 +3741,11 @@ public:
        return *this;
      }
  
+    CallLoweringInfo &setIsPreallocated(bool Value = true) {
+      IsPreallocated = Value;
+      return *this;
+    }
+
      CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
        IsPostTypeLegalization = Value;
        return *this;
diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h

index e23e9d0..2bd8e99 100644 (file)
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -110,6 +110,9 @@ public:
    /// Return true if this argument has the inalloca attribute.
    bool hasInAllocaAttr() const;
  
+  /// Return true if this argument has the preallocated attribute.
+  bool hasPreallocatedAttr() const;
+
    /// Return true if this argument has the zext attribute.
    bool hasZExtAttr() const;
  
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h

index 05c0768..a76e291 100644 (file)
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -623,6 +623,9 @@ public:
    /// Return the byval type for the specified function parameter.
    Type *getParamByValType(unsigned ArgNo) const;
  
+  /// Return the preallocated type for the specified function parameter.
+  Type *getParamPreallocatedType(unsigned ArgNo) const;
+
    /// Get the stack alignment.
    MaybeAlign getStackAlignment(unsigned Index) const;
  
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h

index fd34bf3..8ad39ad 100644 (file)
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1604,6 +1604,12 @@ public:
      return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
    }
  
+  /// Extract the preallocated type for a call or parameter.
+  Type *getParamPreallocatedType(unsigned ArgNo) const {
+    Type *Ty = Attrs.getParamPreallocatedType(ArgNo);
+    return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
+  }
+
    /// Extract the number of dereferenceable bytes for a call or
    /// parameter (0=unknown).
    uint64_t getDereferenceableBytes(unsigned i) const {
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def

index 5ab88f3..8385af9 100644 (file)
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -127,6 +127,12 @@ HANDLE_TARGET_OPCODE(PATCHPOINT)
  /// additionally expand this pseudo after register allocation.
  HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD)
  
+/// These are used to support call sites that must have the stack adjusted
+/// before the call (e.g. to initialize an argument passed by value).
+/// See llvm.call.preallocated.{setup,arg} in the LangRef for more details.
+HANDLE_TARGET_OPCODE(PREALLOCATED_SETUP)
+HANDLE_TARGET_OPCODE(PREALLOCATED_ARG)
+
  /// Call instruction with associated vm state for deoptimization and list
  /// of live pointers for relocation by the garbage collector.  It is
  /// intended to support garbage collection with fully precise relocating
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td

index 70b17bc..c628fa8 100644 (file)
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1173,6 +1173,18 @@ def LOAD_STACK_GUARD : StandardPseudoInstruction {
    let hasSideEffects = 0;
    bit isPseudo = 1;
  }
+def PREALLOCATED_SETUP : StandardPseudoInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$a);
+  let usesCustomInserter = 1;
+  let hasSideEffects = 1;
+}
+def PREALLOCATED_ARG : StandardPseudoInstruction {
+  let OutOperandList = (outs ptr_rc:$loc);
+  let InOperandList = (ins i32imm:$a, i32imm:$b);
+  let usesCustomInserter = 1;
+  let hasSideEffects = 1;
+}
  def LOCAL_ESCAPE : StandardPseudoInstruction {
    // This instruction is really just a label. It has to be part of the chain so
    // that it doesn't get dropped from the DAG, but it produces nothing and has
diff --git a/llvm/include/llvm/Target/TargetCallingConv.td b/llvm/include/llvm/Target/TargetCallingConv.td

index d5f3931..057f330 100644 (file)
--- a/llvm/include/llvm/Target/TargetCallingConv.td
+++ b/llvm/include/llvm/Target/TargetCallingConv.td
@@ -41,6 +41,11 @@ class CCIf<string predicate, CCAction A> : CCPredicateAction<A> {
  class CCIfByVal<CCAction A> : CCIf<"ArgFlags.isByVal()", A> {
  }
  
+/// CCIfPreallocated - If the current argument has Preallocated parameter attribute,
+/// apply Action A.
+class CCIfPreallocated<CCAction A> : CCIf<"ArgFlags.isPreallocated()", A> {
+}
+
  /// CCIfSwiftSelf - If the current argument has swiftself parameter attribute,
  /// apply Action A.
  class CCIfSwiftSelf<CCAction A> : CCIf<"ArgFlags.isSwiftSelf()", A> {
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp

index 86fbb4b..1e85d0a 100644 (file)
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -96,10 +96,12 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
      Flags.setSwiftError();
    if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
      Flags.setByVal();
+  if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated))
+    Flags.setPreallocated();
    if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
      Flags.setInAlloca();
  
-  if (Flags.isByVal() || Flags.isInAlloca()) {
+  if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
      Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
  
      auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp

index d117c60..eb5b7ef 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1214,7 +1214,16 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
        // the various CC lowering callbacks.
        Flags.setByVal();
      }
-    if (Arg.IsByVal || Arg.IsInAlloca) {
+    if (Arg.IsPreallocated) {
+      Flags.setPreallocated();
+      // Set the byval flag for CCAssignFn callbacks that don't know about
+      // preallocated. This way we can know how many bytes we should've
+      // allocated and how many bytes a callee cleanup function will pop.  If we
+      // port preallocated to more targets, we'll have to add custom
+      // preallocated handling in the various CC lowering callbacks.
+      Flags.setByVal();
+    }
+    if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) {
        PointerType *Ty = cast<PointerType>(Arg.Ty);
        Type *ElementTy = Ty->getElementType();
        unsigned FrameSize =
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index 2000f51..2dcab73 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1873,9 +1873,6 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
  }
  
  SDValue SelectionDAG::getSrcValue(const Value *V) {
-  assert((!V || V->getType()->isPointerTy()) &&
-         "SrcValue is not a pointer?");
-
    FoldingSetNodeID ID;
    AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
    ID.AddPointer(V);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 54157cc..ee8a2f9 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5606,6 +5606,23 @@ void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
    LowerCallTo(I, Callee, I.isTailCall());
  }
  
+/// Given a @llvm.call.preallocated.setup, return the corresponding
+/// preallocated call.
+static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
+  assert(cast<CallBase>(PreallocatedSetup)
+                 ->getCalledFunction()
+                 ->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
+         "expected call_preallocated_setup Value");
+  for (auto *U : PreallocatedSetup->users()) {
+    auto *UseCall = cast<CallBase>(U);
+    const Function *Fn = UseCall->getCalledFunction();
+    if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
+      return UseCall;
+    }
+  }
+  llvm_unreachable("expected corresponding call to preallocated setup/arg");
+}
+
  /// Lower the call to the specified intrinsic function.
  void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                                               unsigned Intrinsic) {
@@ -5798,6 +5815,30 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
      updateDAGForMaybeTailCall(MC);
      return;
    }
+  case Intrinsic::call_preallocated_setup: {
+    const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
+    SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+    SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
+                              getRoot(), SrcValue);
+    setValue(&I, Res);
+    DAG.setRoot(Res);
+    return;
+  }
+  case Intrinsic::call_preallocated_arg: {
+    const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
+    SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+    SDValue Ops[3];
+    Ops[0] = getRoot();
+    Ops[1] = SrcValue;
+    Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
+                                   MVT::i32); // arg index
+    SDValue Res = DAG.getNode(
+        ISD::PREALLOCATED_ARG, sdl,
+        DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return;
+  }
    case Intrinsic::dbg_addr:
    case Intrinsic::dbg_declare: {
      const auto &DI = cast<DbgVariableIntrinsic>(I);
@@ -7116,7 +7157,9 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
        .setChain(getRoot())
        .setCallee(RetTy, FTy, Callee, std::move(Args), CB)
        .setTailCall(isTailCall)
-      .setConvergent(CB.isConvergent());
+      .setConvergent(CB.isConvergent())
+      .setIsPreallocated(
+          CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
    std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
  
    if (Result.first.getNode()) {
@@ -7642,9 +7685,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
    // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
    // have to do anything here to lower funclet bundles.
    // CFGuardTarget bundles are lowered in LowerCallTo.
-  assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
-                                        LLVMContext::OB_funclet,
-                                        LLVMContext::OB_cfguardtarget}) &&
+  assert(!I.hasOperandBundlesOtherThan(
+             {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
+              LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) &&
           "Cannot lower calls with arbitrary operand bundles!");
  
    SDValue Callee = getValue(I.getCalledOperand());
@@ -8605,7 +8648,9 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
        .setChain(getRoot())
        .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
        .setDiscardResult(Call->use_empty())
-      .setIsPatchPoint(IsPatchPoint);
+      .setIsPatchPoint(IsPatchPoint)
+      .setIsPreallocated(
+          Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
  }
  
  /// Add a stack map intrinsic call's live variable operands to a stackmap
@@ -9125,6 +9170,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
          Flags.setCFGuardTarget();
        if (Args[i].IsByVal)
          Flags.setByVal();
+      if (Args[i].IsPreallocated) {
+        Flags.setPreallocated();
+        // Set the byval flag for CCAssignFn callbacks that don't know about
+        // preallocated.  This way we can know how many bytes we should've
+        // allocated and how many bytes a callee cleanup function will pop.  If
+        // we port preallocated to more targets, we'll have to add custom
+        // preallocated handling in the various CC lowering callbacks.
+        Flags.setByVal();
+      }
        if (Args[i].IsInAlloca) {
          Flags.setInAlloca();
          // Set the byval flag for CCAssignFn callbacks that don't know about
@@ -9134,7 +9188,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
          // in the various CC lowering callbacks.
          Flags.setByVal();
        }
-      if (Args[i].IsByVal || Args[i].IsInAlloca) {
+      if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
          PointerType *Ty = cast<PointerType>(Args[i].Ty);
          Type *ElementTy = Ty->getElementType();
  
@@ -9448,7 +9502,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
      // initializes the alloca. Don't elide copies from the same argument twice.
      const Value *Val = SI->getValueOperand()->stripPointerCasts();
      const auto *Arg = dyn_cast<Argument>(Val);
-    if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+    if (!Arg || Arg->hasPassPointeeByValueAttr() ||
          Arg->getType()->isEmptyTy() ||
          DL.getTypeStoreSize(Arg->getType()) !=
              DL.getTypeAllocSize(AI->getAllocatedType()) ||
@@ -9634,12 +9688,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
          // in the various CC lowering callbacks.
          Flags.setByVal();
        }
+      if (Arg.hasAttribute(Attribute::Preallocated)) {
+        Flags.setPreallocated();
+        // Set the byval flag for CCAssignFn callbacks that don't know about
+        // preallocated.  This way we can know how many bytes we should've
+        // allocated and how many bytes a callee cleanup function will pop.  If
+        // we port preallocated to more targets, we'll have to add custom
+        // preallocated handling in the various CC lowering callbacks.
+        Flags.setByVal();
+      }
        if (F.getCallingConv() == CallingConv::X86_INTR) {
          // IA Interrupt passes frame (1st parameter) by value in the stack.
          if (ArgNo == 0)
            Flags.setByVal();
        }
-      if (Flags.isByVal() || Flags.isInAlloca()) {
+      if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
          Type *ElementTy = Arg.getParamByValType();
  
          // For ByVal, size and alignment should be passed from FE.  BE will
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

index f81d18c..816b1dc 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -393,6 +393,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
    case ISD::GC_TRANSITION_END:          return "gc_transition.end";
    case ISD::GET_DYNAMIC_AREA_OFFSET:    return "get.dynamic.area.offset";
    case ISD::FREEZE:                     return "freeze";
+  case ISD::PREALLOCATED_SETUP:
+    return "call_setup";
+  case ISD::PREALLOCATED_ARG:
+    return "call_alloc";
  
    // Bit manipulation
    case ISD::ABS:                        return "abs";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index a889078..0c7df1c 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -110,14 +110,18 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
    IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
    IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
    IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
+  IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
    IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
    IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
    IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
    IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
    Alignment = Call->getParamAlign(ArgIdx);
    ByValType = nullptr;
-  if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
+  if (IsByVal)
      ByValType = Call->getParamByValType(ArgIdx);
+  PreallocatedType = nullptr;
+  if (IsPreallocated)
+    PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
  }
  
  /// Generate a libcall taking the given operands as arguments and returning a
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp

index bf39740..122cfe5 100644 (file)
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -1437,6 +1437,10 @@ Type *AttributeList::getParamByValType(unsigned Index) const {
    return getAttributes(Index+FirstArgIndex).getByValType();
  }
  
+Type *AttributeList::getParamPreallocatedType(unsigned Index) const {
+  return getAttributes(Index + FirstArgIndex).getPreallocatedType();
+}
+
  MaybeAlign AttributeList::getStackAlignment(unsigned Index) const {
    return getAttributes(Index).getStackAlignment();
  }
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp

index d8663be..cb44429 100644 (file)
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -114,6 +114,12 @@ bool Argument::hasInAllocaAttr() const {
    return hasAttribute(Attribute::InAlloca);
  }
  
+bool Argument::hasPreallocatedAttr() const {
+  if (!getType()->isPointerTy())
+    return false;
+  return hasAttribute(Attribute::Preallocated);
+}
+
  bool Argument::hasPassPointeeByValueAttr() const {
    if (!getType()->isPointerTy()) return false;
    AttributeList Attrs = getParent()->getAttributes();
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td

index 3ec947d..802e694 100644 (file)
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -789,8 +789,9 @@ def CC_X86_32_Vector_Darwin : CallingConv<[
  /// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
  /// values are spilled on the stack.
  def CC_X86_32_Common : CallingConv<[
-  // Handles byval parameters.
+  // Handles byval/preallocated parameters.
    CCIfByVal<CCPassByVal<4, 4>>,
+  CCIfPreallocated<CCPassByVal<4, 4>>,
  
    // The first 3 float or double arguments, if marked 'inreg' and if the call
    // is not a vararg call and if SSE2 is available, are passed in SSE registers.
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp

index 5bc4edc..5a51e24 100644 (file)
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3245,7 +3245,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
      return false;
  
    for (auto Flag : CLI.OutFlags)
-    if (Flag.isSwiftError())
+    if (Flag.isSwiftError() || Flag.isPreallocated())
        return false;
  
    SmallVector<MVT, 16> OutVTs;
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp

index b538d0b..e00a260 100644 (file)
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -57,7 +57,8 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
  
  bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
    return !MF.getFrameInfo().hasVarSizedObjects() &&
-         !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
+         !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
+         !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
  }
  
  /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
@@ -67,6 +68,7 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
  bool
  X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
    return hasReservedCallFrame(MF) ||
+         MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
           (hasFP(MF) && !TRI->needsStackRealignment(MF)) ||
           TRI->hasBasePointer(MF);
  }
@@ -90,10 +92,10 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
  bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
    const MachineFrameInfo &MFI = MF.getFrameInfo();
    return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
-          TRI->needsStackRealignment(MF) ||
-          MFI.hasVarSizedObjects() ||
+          TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects() ||
            MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
            MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+          MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
            MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
            MFI.hasStackMap() || MFI.hasPatchPoint() ||
            MFI.hasCopyImplyingStackAdjustment());
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

index ffe9d43..5586efb 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5552,6 +5552,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
      CurDAG->RemoveDeadNode(Node);
      return;
    }
+  case ISD::PREALLOCATED_SETUP: {
+    auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+    auto CallId = MFI->getPreallocatedIdForCallSite(
+        cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
+    SDValue Chain = Node->getOperand(0);
+    SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
+    MachineSDNode *New = CurDAG->getMachineNode(
+        TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
+    ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
+    CurDAG->RemoveDeadNode(Node);
+    return;
+  }
+  case ISD::PREALLOCATED_ARG: {
+    auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+    auto CallId = MFI->getPreallocatedIdForCallSite(
+        cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
+    SDValue Chain = Node->getOperand(0);
+    SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
+    SDValue ArgIndex = Node->getOperand(2);
+    SDValue Ops[3];
+    Ops[0] = CallIdValue;
+    Ops[1] = ArgIndex;
+    Ops[2] = Chain;
+    MachineSDNode *New = CurDAG->getMachineNode(
+        TargetOpcode::PREALLOCATED_ARG, dl,
+        CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
+                          MVT::Other),
+        Ops);
+    ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
+    ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
+    CurDAG->RemoveDeadNode(Node);
+    return;
+  }
    }
  
    SelectCode(Node);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index f4a88de..8380a0d 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3945,6 +3945,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
      if (ArgLocs.back().getLocMemOffset() != 0)
        report_fatal_error("any parameter with the inalloca attribute must be "
                           "the only memory argument");
+  } else if (CLI.IsPreallocated) {
+    assert(ArgLocs.back().isMemLoc() &&
+           "cannot use preallocated attribute on a register "
+           "parameter");
+    SmallVector<size_t, 4> PreallocatedOffsets;
+    for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
+      if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
+        PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
+      }
+    }
+    auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
+    size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
+    MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
+    MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
+    NumBytesToPush = 0;
    }
  
    if (!IsSibcall && !IsMustTail)
@@ -3972,9 +3987,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
    for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
         ++I, ++OutIndex) {
      assert(OutIndex < Outs.size() && "Invalid Out index");
-    // Skip inalloca arguments, they have already been written.
+    // Skip inalloca/preallocated arguments, they have already been written.
      ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
-    if (Flags.isInAlloca())
+    if (Flags.isInAlloca() || Flags.isPreallocated())
        continue;
  
      CCValAssign &VA = ArgLocs[I];
@@ -4161,8 +4176,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
        assert(VA.isMemLoc());
        SDValue Arg = OutVals[OutsIndex];
        ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
-      // Skip inalloca arguments.  They don't require any work.
-      if (Flags.isInAlloca())
+      // Skip inalloca/preallocated arguments.  They don't require any work.
+      if (Flags.isInAlloca() || Flags.isPreallocated())
          continue;
        // Create frame index.
        int32_t Offset = VA.getLocMemOffset()+FPDiff;
@@ -33076,6 +33091,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
        BB->addLiveIn(BasePtr);
      return BB;
    }
+  case TargetOpcode::PREALLOCATED_SETUP: {
+    assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
+    auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+    MFI->setHasPreallocatedCall(true);
+    int64_t PreallocatedId = MI.getOperand(0).getImm();
+    size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
+    assert(StackAdjustment != 0 && "0 stack adjustment");
+    LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment "
+                      << StackAdjustment << "\n");
+    BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP)
+        .addReg(X86::ESP)
+        .addImm(StackAdjustment);
+    MI.eraseFromParent();
+    return BB;
+  }
+  case TargetOpcode::PREALLOCATED_ARG: {
+    assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
+    int64_t PreallocatedId = MI.getOperand(1).getImm();
+    int64_t ArgIdx = MI.getOperand(2).getImm();
+    auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+    size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
+    LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
+                      << ", arg offset " << ArgOffset << "\n");
+    // stack pointer + offset
+    addRegOffset(
+        BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()),
+        X86::ESP, false, ArgOffset);
+    MI.eraseFromParent();
+    return BB;
+  }
    }
  }
  
diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h

index 70deef9..eedad95 100644 (file)
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -13,6 +13,8 @@
  #ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
  #define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
  
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
  #include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/CodeGen/MachineFunction.h"
  
@@ -103,6 +105,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
    /// True if this function has WIN_ALLOCA instructions.
    bool HasWinAlloca = false;
  
+  /// True if this function has any preallocated calls.
+  bool HasPreallocatedCall = false;
+
+  ValueMap<const Value *, size_t> PreallocatedIds;
+  SmallVector<size_t, 0> PreallocatedStackSizes;
+  SmallVector<SmallVector<size_t, 4>, 0> PreallocatedArgOffsets;
+
  private:
    /// ForwardedMustTailRegParms - A list of virtual and physical registers
    /// that must be forwarded to every musttail call.
@@ -184,6 +193,36 @@ public:
  
    bool hasWinAlloca() const { return HasWinAlloca; }
    void setHasWinAlloca(bool v) { HasWinAlloca = v; }
+
+  bool hasPreallocatedCall() const { return HasPreallocatedCall; }
+  void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }
+
+  size_t getPreallocatedIdForCallSite(const Value *CS) {
+    auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()});
+    if (Insert.second) {
+      PreallocatedStackSizes.push_back(0);
+      PreallocatedArgOffsets.emplace_back();
+    }
+    return Insert.first->second;
+  }
+
+  void setPreallocatedStackSize(size_t Id, size_t StackSize) {
+    PreallocatedStackSizes[Id] = StackSize;
+  }
+
+  size_t getPreallocatedStackSize(const size_t Id) {
+    assert(PreallocatedStackSizes[Id] != 0 && "stack size not set");
+    return PreallocatedStackSizes[Id];
+  }
+
+  void setPreallocatedArgOffsets(size_t Id, ArrayRef<size_t> AO) {
+    PreallocatedArgOffsets[Id].assign(AO.begin(), AO.end());
+  }
+
+  const ArrayRef<size_t> getPreallocatedArgOffsets(const size_t Id) {
+    assert(!PreallocatedArgOffsets[Id].empty() && "arg offsets not set");
+    return PreallocatedArgOffsets[Id];
+  }
  };
  
  } // End llvm namespace
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp

index 7834632..f456728 100644 (file)
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -627,18 +627,22 @@ static bool CantUseSP(const MachineFrameInfo &MFI) {
  }
  
  bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
-   const MachineFrameInfo &MFI = MF.getFrameInfo();
-
-   if (!EnableBasePointer)
-     return false;
-
-   // When we need stack realignment, we can't address the stack from the frame
-   // pointer.  When we have dynamic allocas or stack-adjusting inline asm, we
-   // can't address variables from the stack pointer.  MS inline asm can
-   // reference locals while also adjusting the stack pointer.  When we can't
-   // use both the SP and the FP, we need a separate base pointer register.
-   bool CantUseFP = needsStackRealignment(MF);
-   return CantUseFP && CantUseSP(MFI);
+  const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  if (X86FI->hasPreallocatedCall())
+    return true;
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  if (!EnableBasePointer)
+    return false;
+
+  // When we need stack realignment, we can't address the stack from the frame
+  // pointer.  When we have dynamic allocas or stack-adjusting inline asm, we
+  // can't address variables from the stack pointer.  MS inline asm can
+  // reference locals while also adjusting the stack pointer.  When we can't
+  // use both the SP and the FP, we need a separate base pointer register.
+  bool CantUseFP = needsStackRealignment(MF);
+  return CantUseFP && CantUseSP(MFI);
  }
  
  bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp

index 520aa9d..4fd1276 100644 (file)
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1015,9 +1015,9 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
  
    // CI should not has any ABI-impacting function attributes.
    static const Attribute::AttrKind ABIAttrs[] = {
-      Attribute::StructRet, Attribute::ByVal,    Attribute::InAlloca,
-      Attribute::InReg,     Attribute::Returned, Attribute::SwiftSelf,
-      Attribute::SwiftError};
+      Attribute::StructRet,    Attribute::ByVal,     Attribute::InAlloca,
+      Attribute::Preallocated, Attribute::InReg,     Attribute::Returned,
+      Attribute::SwiftSelf,    Attribute::SwiftError};
    AttributeList Attrs = CI.getAttributes();
    for (auto AK : ABIAttrs)
      if (Attrs.hasParamAttribute(0, AK))
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp

index a4f0965..df6cbba 100644 (file)
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1363,7 +1363,8 @@ bool Attributor::isValidFunctionSignatureRewrite(
    AttributeList FnAttributeList = Fn->getAttributes();
    if (FnAttributeList.hasAttrSomewhere(Attribute::Nest) ||
        FnAttributeList.hasAttrSomewhere(Attribute::StructRet) ||
-      FnAttributeList.hasAttrSomewhere(Attribute::InAlloca)) {
+      FnAttributeList.hasAttrSomewhere(Attribute::InAlloca) ||
+      FnAttributeList.hasAttrSomewhere(Attribute::Preallocated)) {
      LLVM_DEBUG(
          dbgs() << "[Attributor] Cannot rewrite due to complex attribute\n");
      return false;
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp

index 3af6d08..0117a73 100644 (file)
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -4455,7 +4455,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
      AAValueSimplifyImpl::initialize(A);
      if (!getAnchorScope() || getAnchorScope()->isDeclaration())
        indicatePessimisticFixpoint();
-    if (hasAttr({Attribute::InAlloca, Attribute::StructRet, Attribute::Nest},
+    if (hasAttr({Attribute::InAlloca, Attribute::Preallocated,
+                 Attribute::StructRet, Attribute::Nest},
                  /* IgnoreSubsumingPositions */ true))
        indicatePessimisticFixpoint();
  
@@ -5695,7 +5696,7 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
  
      // TODO: From readattrs.ll: "inalloca parameters are always
      //                           considered written"
-    if (hasAttr({Attribute::InAlloca})) {
+    if (hasAttr({Attribute::InAlloca, Attribute::Preallocated})) {
        removeKnownBits(NO_WRITES);
        removeAssumedBits(NO_WRITES);
      }
diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp

index 55039f4..5dc1a6f 100644 (file)
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -483,9 +483,10 @@ DeadArgumentEliminationPass::SurveyUses(const Value *V,
  // We consider arguments of non-internal functions to be intrinsically alive as
  // well as arguments to functions which have their "address taken".
  void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
-  // Functions with inalloca parameters are expecting args in a particular
-  // register and memory layout.
-  if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
+  // Functions with inalloca/preallocated parameters are expecting args in a
+  // particular register and memory layout.
+  if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+      F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
      MarkLive(F);
      return;
    }
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp

index 0701dbc..4baeaa6 100644 (file)
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -447,7 +447,7 @@ determinePointerReadAttrs(Argument *A,
    SmallPtrSet<Use *, 32> Visited;
  
    // inalloca arguments are always clobbered by the call.
-  if (A->hasInAllocaAttr())
+  if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
      return Attribute::None;
  
    bool IsRead = false;
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp

index c5837ea..0257be1 100644 (file)
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2333,6 +2333,7 @@ OptimizeFunctions(Module &M,
      // wouldn't be safe in the presence of inalloca.
      // FIXME: We should also hoist alloca affected by this to the entry
      // block if possible.
+    // FIXME: handle preallocated
      if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
          !F->hasAddressTaken()) {
        RemoveAttribute(F, Attribute::InAlloca);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

index d554011..64ba81a 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4737,6 +4737,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
    //
    //  Similarly, avoid folding away bitcasts of byval calls.
    if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+      Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated) ||
        Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
      return false;
  
diff --git a/llvm/test/CodeGen/X86/arg-copy-elide.ll b/llvm/test/CodeGen/X86/arg-copy-elide.ll

index 705a356..14db84f 100644 (file)
--- a/llvm/test/CodeGen/X86/arg-copy-elide.ll
+++ b/llvm/test/CodeGen/X86/arg-copy-elide.ll
@@ -246,6 +246,20 @@ entry:
  ; CHECK: calll _addrof_i32
  ; CHECK: retl
  
+define void @avoid_preallocated(i32* preallocated(i32) %x) {
+entry:
+  %x.p.p = alloca i32*
+  store i32* %x, i32** %x.p.p
+  call void @addrof_i32(i32* %x)
+  ret void
+}
+
+; CHECK-LABEL: _avoid_preallocated:
+; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i32
+; CHECK: retl
+
  ; Don't elide the copy when the alloca is escaped with a store.
  define void @escape_with_store(i32 %x) {
    %x1 = alloca i32
diff --git a/llvm/test/CodeGen/X86/musttail-indirect.ll b/llvm/test/CodeGen/X86/musttail-indirect.ll

index c142ffa..285ad9d 100644 (file)
--- a/llvm/test/CodeGen/X86/musttail-indirect.ll
+++ b/llvm/test/CodeGen/X86/musttail-indirect.ll
@@ -22,6 +22,8 @@
  ; Each member pointer creates a thunk.  The ones with inalloca are required to
  ; tail calls by the ABI, even at O0.
  
+; TODO: add tests for preallocated/musttail once supported
+
  %struct.B = type { i32 (...)** }
  %struct.A = type { i32 }
  
diff --git a/llvm/test/CodeGen/X86/musttail-thiscall.ll b/llvm/test/CodeGen/X86/musttail-thiscall.ll

index a1ddbd5..5cc8faa 100644 (file)
--- a/llvm/test/CodeGen/X86/musttail-thiscall.ll
+++ b/llvm/test/CodeGen/X86/musttail-thiscall.ll
@@ -1,6 +1,8 @@
  ; RUN: llc -verify-machineinstrs -mtriple=i686-- < %s | FileCheck %s
  ; RUN: llc -verify-machineinstrs -mtriple=i686-- -O0 < %s | FileCheck %s
  
+; TODO: add tests for preallocated/musttail once supported
+
  ; CHECK-LABEL: t1:
  ; CHECK: jmp {{_?}}t1_callee
  define x86_thiscallcc void @t1(i8* %this) {
diff --git a/llvm/test/CodeGen/X86/preallocated-nocall.ll b/llvm/test/CodeGen/X86/preallocated-nocall.ll

new file mode 100644 (file)

index 0000000..8744676
--- /dev/null
+++ b/llvm/test/CodeGen/X86/preallocated-nocall.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+; XFAIL: *
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+%Foo = type { i32, i32 }
+
+declare void @init(%Foo*)
+
+
+
+declare void @foo_p(%Foo* preallocated(%Foo))
+
+define void @no_call() {
+; CHECK-LABEL: _no_call:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+  call void @init(%Foo* %b)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/preallocated-x64.ll b/llvm/test/CodeGen/X86/preallocated-x64.ll

new file mode 100644 (file)

index 0000000..146bfd7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/preallocated-x64.ll
@@ -0,0 +1,17 @@
+; RUN: llc %s -mtriple=x86_64-windows-msvc -o /dev/null 2>&1
+; XFAIL: *
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+%Foo = type { i32, i32 }
+
+declare x86_thiscallcc void @f(i32, %Foo* preallocated(%Foo))
+
+define void @g() {
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+  call void @f(i32 0, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/preallocated.ll b/llvm/test/CodeGen/X86/preallocated.ll

new file mode 100644 (file)

index 0000000..8de6988
--- /dev/null
+++ b/llvm/test/CodeGen/X86/preallocated.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+%Foo = type { i32, i32 }
+
+declare void @init(%Foo*)
+
+
+
+declare void @foo_p(%Foo* preallocated(%Foo))
+
+define void @one_preallocated() {
+; CHECK-LABEL: _one_preallocated:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: calll _foo_p
+  call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+define void @one_preallocated_two_blocks() {
+; CHECK-LABEL: _one_preallocated_two_blocks:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  br label %second
+second:
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: calll _foo_p
+  call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+define void @preallocated_with_store() {
+; CHECK-LABEL: _preallocated_with_store:
+; CHECK: subl $8, %esp
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+  %p0 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
+  %p1 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
+  store i32 13, i32* %p0
+  store i32 42, i32* %p1
+; CHECK-DAG: movl $13, ([[REGISTER]])
+; CHECK-DAG: movl $42, 4([[REGISTER]])
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_p
+  call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+define void @preallocated_with_init() {
+; CHECK-LABEL: _preallocated_with_init:
+; CHECK: subl $8, %esp
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: pushl [[REGISTER]]
+; CHECK: calll _init
+  call void @init(%Foo* %b)
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_p
+  call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+declare void @foo_p_p(%Foo* preallocated(%Foo), %Foo* preallocated(%Foo))
+
+define void @two_preallocated() {
+; CHECK-LABEL: _two_preallocated:
+  %t = call token @llvm.call.preallocated.setup(i32 2)
+  %a1 = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b1 = bitcast i8* %a1 to %Foo*
+  %a2 = call i8* @llvm.call.preallocated.arg(token %t, i32 1) preallocated(%Foo)
+  %b2 = bitcast i8* %a2 to %Foo*
+; CHECK: subl $16, %esp
+; CHECK: calll _foo_p_p
+  call void @foo_p_p(%Foo* preallocated(%Foo) %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t)]
+  ret void
+}
+
+declare void @foo_p_int(%Foo* preallocated(%Foo), i32)
+
+define void @one_preallocated_one_normal() {
+; CHECK-LABEL: _one_preallocated_one_normal:
+; CHECK: subl $12, %esp
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: pushl [[REGISTER]]
+; CHECK: calll _init
+  call void @init(%Foo* %b)
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: movl $2, 8(%esp)
+; CHECK: calll _foo_p_int
+  call void @foo_p_int(%Foo* preallocated(%Foo) %b, i32 2) ["preallocated"(token %t)]
+  ret void
+}
+
+declare void @foo_ret_p(%Foo* sret, %Foo* preallocated(%Foo))
+
+define void @nested_with_init() {
+; CHECK-LABEL: _nested_with_init:
+  %tmp = alloca %Foo
+
+  %t1 = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: subl $12, %esp
+  %a1 = call i8* @llvm.call.preallocated.arg(token %t1, i32 0) preallocated(%Foo)
+  %b1 = bitcast i8* %a1 to %Foo*
+; CHECK: leal 4(%esp), [[REGISTER1:%[a-z]+]]
+
+  %t2 = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: subl $12, %esp
+  %a2 = call i8* @llvm.call.preallocated.arg(token %t2, i32 0) preallocated(%Foo)
+; CHECK: leal 4(%esp), [[REGISTER2:%[a-z]+]]
+  %b2 = bitcast i8* %a2 to %Foo*
+
+  call void @init(%Foo* %b2)
+; CHECK: pushl [[REGISTER2]]
+; CHECK: calll _init
+
+  call void @foo_ret_p(%Foo* %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)]
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_ret_p
+  call void @foo_ret_p(%Foo* %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)]
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_ret_p
+  ret void
+}
+
+declare void @foo_inreg_p(i32 inreg, %Foo* preallocated(%Foo))
+
+define void @inreg() {
+; CHECK-LABEL: _inreg:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: movl $9, %eax
+; CHECK: calll _foo_inreg_p
+  call void @foo_inreg_p(i32 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+declare x86_thiscallcc void @foo_thiscall_p(i8*, %Foo* preallocated(%Foo))
+
+define void @thiscall() {
+; CHECK-LABEL: _thiscall:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: xorl %ecx, %ecx
+; CHECK: calll _foo_thiscall_p
+  call x86_thiscallcc void @foo_thiscall_p(i8* null, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+declare x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo))
+declare x86_stdcallcc void @i(i32)
+
+define void @stdcall() {
+; CHECK-LABEL: _stdcall:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: calll _foo_stdcall_p@8
+  call x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+; CHECK-NOT: %esp
+; CHECK: pushl
+; CHECK: calll _i@4
+  call x86_stdcallcc void @i(i32 0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll b/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll

index 1364732..32e046d 100644 (file)
--- a/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll
+++ b/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll
@@ -1,5 +1,8 @@
  ; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s
  
+; TODO: add preallocated versions of tests
+; we don't yet support conditionally called preallocated calls after the setup
+
  ; chkstk cannot come before the usual prologue, since it adjusts ESP.
  ; If chkstk is used in the prologue, we also have to be careful about preserving
  ; EAX if it is used.
diff --git a/llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll b/llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll

index 491bbba..34db632 100644 (file)
--- a/llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll
+++ b/llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll
@@ -9,6 +9,21 @@ target triple = "i386-pc-windows-msvc19.0.24215"
  declare x86_stdcallcc void @tail_std(i32)
  declare void @capture(i32*)
  
+define x86_thiscallcc void @preallocated(i32* %this, i32* preallocated(i32) %args) {
+entry:
+  %val = load i32, i32* %args
+  store i32 0, i32* %args
+  tail call x86_stdcallcc void @tail_std(i32 %val)
+  ret void
+}
+
+; CHECK-LABEL: _preallocated:                              # @preallocated
+; CHECK:         movl    4(%esp), %[[reg:[^ ]*]]
+; CHECK:         movl    $0, 4(%esp)
+; CHECK:         pushl   %[[reg]]
+; CHECK:         calll   _tail_std@4
+; CHECK:         retl    $4
+
  define x86_thiscallcc void @inalloca(i32* %this, i32* inalloca %args) {
  entry:
    %val = load i32, i32* %args
diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll

index 9155911..ac0fcae 100644 (file)
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -6,6 +6,8 @@
  
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  declare void @f(i32)
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
  
  ; Test1: Replace argument with constant
  define internal void @test1(i32 %a) {
@@ -280,6 +282,24 @@ define i32* @complicated_args_inalloca() {
    ret i32* %call
  }
  
+define internal i32* @test_preallocated(i32* preallocated(i32) %a) {
+; CHECK-LABEL: define {{[^@]+}}@test_preallocated
+; CHECK-SAME: (i32* noalias nofree returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]])
+; CHECK-NEXT:    ret i32* [[A]]
+;
+  ret i32* %a
+}
+define i32* @complicated_args_preallocated() {
+; CHECK-LABEL: define {{[^@]+}}@complicated_args_preallocated()
+; CHECK-NEXT:    [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK-NEXT:    [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null)
+; CHECK-NEXT:    ret i32* [[CALL]]
+;
+  %c = call token @llvm.call.preallocated.setup(i32 1)
+  %call = call i32* @test_preallocated(i32* preallocated(i32) null) ["preallocated"(token %c)]
+  ret i32* %call
+}
+
  define internal void @test_sret(%struct.X* sret %a, %struct.X** %b) {
  ;
  ; CHECK-LABEL: define {{[^@]+}}@test_sret
diff --git a/llvm/test/Transforms/DeadArgElim/keepalive.ll b/llvm/test/Transforms/DeadArgElim/keepalive.ll

index d8a0993..c53ee76 100644 (file)
--- a/llvm/test/Transforms/DeadArgElim/keepalive.ll
+++ b/llvm/test/Transforms/DeadArgElim/keepalive.ll
@@ -1,5 +1,8 @@
  ; RUN: opt < %s -deadargelim -S | FileCheck %s
  
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
  %Ty = type <{ i32, i32 }>
  
  ; Check if the pass doesn't modify anything that doesn't need changing. We feed
@@ -44,4 +47,22 @@ define i32 @caller2() {
         ret i32 %v
  }
  
+; We can't remove 'this' here, as that would put argmem in ecx instead of
+; memory.
+define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem) {
+       %v = load i32, i32* %argmem
+       ret i32 %v
+}
+; CHECK-LABEL: define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem)
+
+define i32 @caller3() {
+       %t = alloca i32
+       %c = call token @llvm.call.preallocated.setup(i32 1)
+       %M = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
+       %m = bitcast i8* %M to i32*
+       store i32 42, i32* %m
+       %v = call x86_thiscallcc i32 @unused_this_preallocated(i32* %t, i32* preallocated(i32) %m) ["preallocated"(token %c)]
+       ret i32 %v
+}
+
  ; CHECK: attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll

index d0a3cef..7e21218 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
@@ -58,6 +58,16 @@ define void @test9_2(%struct.x* inalloca  %a) nounwind  {
    ret void
  }
  
+; Test for preallocated handling.
+define void @test9_3(%struct.x* preallocated(%struct.x)  %a) nounwind  {
+; CHECK-LABEL: @test9_3(
+; CHECK-NEXT:    ret void
+;
+  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+  store i32 1, i32* %tmp2, align 4
+  ret void
+}
+
  ; DSE should delete the dead trampoline.
  declare void @test11f()
  define void @test11() {
diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll

index ee902a0..9c381fc 100644 (file)
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -169,6 +169,16 @@ define void @test9_2(%struct.x* inalloca  %a) nounwind  {
    ret void
  }
  
+; Test for preallocated handling.
+define void @test9_3(%struct.x* preallocated(%struct.x)  %a) nounwind  {
+; CHECK-LABEL: @test9_3(
+; CHECK-NEXT:    ret void
+;
+  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+  store i32 1, i32* %tmp2, align 4
+  ret void
+}
+
  ; va_arg has fuzzy dependence, the store shouldn't be zapped.
  define double @test10(i8* %X) {
  ; CHECK-LABEL: @test10(
diff --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll

index b11b3ed..e566c96 100644 (file)
--- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -56,6 +56,12 @@ define void @test7_1(i32* inalloca %a) {
    ret void
  }
  
+; CHECK: define void @test7_2(i32* nocapture preallocated(i32) %a)
+; preallocated parameters are always considered written
+define void @test7_2(i32* preallocated(i32) %a) {
+  ret void
+}
+
  ; CHECK: define i32* @test8_1(i32* readnone returned %p)
  define i32* @test8_1(i32* %p) {
  entry:
diff --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll

index 39542d0..7bf3e97 100644 (file)
--- a/llvm/test/Transforms/GlobalOpt/fastcc.ll
+++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll
@@ -1,5 +1,8 @@
  ; RUN: opt < %s -globalopt -S | FileCheck %s
  
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
  define internal i32 @f(i32* %m) {
  ; CHECK-LABEL: define internal fastcc i32 @f
    %v = load i32, i32* %m
@@ -32,6 +35,13 @@ define internal i32 @inalloca(i32* inalloca %p) {
    ret i32 %rv
  }
  
+define internal i32 @preallocated(i32* preallocated(i32) %p) {
+; TODO: handle preallocated:
+; CHECK-NOT-LABEL: define internal fastcc i32 @preallocated(i32* %p)
+  %rv = load i32, i32* %p
+  ret i32 %rv
+}
+
  define void @call_things() {
    %m = alloca i32
    call i32 @f(i32* %m)
@@ -40,6 +50,11 @@ define void @call_things() {
    call i32 @j(i32* %m)
    %args = alloca inalloca i32
    call i32 @inalloca(i32* inalloca %args)
+  ; TODO: handle preallocated
+  ;%c = call token @llvm.call.preallocated.setup(i32 1)
+  ;%N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
+  ;%n = bitcast i8* %N to i32*
+   ;call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)]
    ret void
  }
  
diff --git a/llvm/test/Transforms/InstCombine/call-cast-target-preallocated.ll b/llvm/test/Transforms/InstCombine/call-cast-target-preallocated.ll

new file mode 100644 (file)

index 0000000..fc96a16
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call-cast-target-preallocated.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-win32"
+
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+declare void @takes_i32(i32)
+declare void @takes_i32_preallocated(i32* preallocated(i32))
+
+define void @f() {
+; CHECK-LABEL: define void @f()
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(i32)
+  %arg = bitcast i8* %a to i32*
+  call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* preallocated(i32) %arg) ["preallocated"(token %t)]
+; CHECK: call void bitcast{{.*}}@takes_i32
+  ret void
+}
+
+define void @g() {
+; CHECK-LABEL: define void @g()
+  call void bitcast (void (i32*)* @takes_i32_preallocated to void (i32)*)(i32 0)
+; CHECK: call void bitcast{{.*}}@takes_i32_preallocated
+  ret void
+}
author	Arthur Eubanks <aeubanks@google.com>
	Mon, 16 Mar 2020 19:32:36 +0000 (12:32 -0700)
committer	Arthur Eubanks <aeubanks@google.com>
	Wed, 20 May 2020 16:20:38 +0000 (09:20 -0700)
llvm/include/llvm/CodeGen/ISDOpcodes.h		patch \| blob \| history
llvm/include/llvm/CodeGen/TargetCallingConv.h		patch \| blob \| history
llvm/include/llvm/CodeGen/TargetLowering.h		patch \| blob \| history
llvm/include/llvm/IR/Argument.h		patch \| blob \| history
llvm/include/llvm/IR/Attributes.h		patch \| blob \| history
llvm/include/llvm/IR/InstrTypes.h		patch \| blob \| history
llvm/include/llvm/Support/TargetOpcodes.def		patch \| blob \| history
llvm/include/llvm/Target/Target.td		patch \| blob \| history
llvm/include/llvm/Target/TargetCallingConv.td		patch \| blob \| history
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/FastISel.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
llvm/lib/IR/Attributes.cpp		patch \| blob \| history
llvm/lib/IR/Function.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86CallingConv.td		patch \| blob \| history
llvm/lib/Target/X86/X86FastISel.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86FrameLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86MachineFunctionInfo.h		patch \| blob \| history
llvm/lib/Target/X86/X86RegisterInfo.cpp		patch \| blob \| history
llvm/lib/Transforms/Coroutines/CoroSplit.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/Attributor.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/AttributorAttributes.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/FunctionAttrs.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/GlobalOpt.cpp		patch \| blob \| history
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/arg-copy-elide.ll		patch \| blob \| history
llvm/test/CodeGen/X86/musttail-indirect.ll		patch \| blob \| history
llvm/test/CodeGen/X86/musttail-thiscall.ll		patch \| blob \| history
llvm/test/CodeGen/X86/preallocated-nocall.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/preallocated-x64.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/preallocated.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll		patch \| blob \| history
llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll		patch \| blob \| history
llvm/test/Transforms/Attributor/value-simplify.ll		patch \| blob \| history
llvm/test/Transforms/DeadArgElim/keepalive.ll		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll		patch \| blob \| history
llvm/test/Transforms/DeadStoreElimination/simple.ll		patch \| blob \| history
llvm/test/Transforms/FunctionAttrs/readattrs.ll		patch \| blob \| history
llvm/test/Transforms/GlobalOpt/fastcc.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/call-cast-target-preallocated.ll	[new file with mode: 0644]	patch \| blob