VAEND,
VASTART,
+ // PREALLOCATED_SETUP - This has 2 operands: an input chain and a SRCVALUE
+ // with the preallocated call Value.
+ PREALLOCATED_SETUP,
+ // PREALLOCATED_ARG - This has 3 operands: an input chain, a SRCVALUE
+ // with the preallocated call Value, and a constant int.
+ PREALLOCATED_ARG,
+
/// SRCVALUE - This is a node type that holds a Value* that is used to
/// make reference to a value in the LLVM IR.
SRCVALUE,
unsigned IsReturned : 1; ///< Always returned
unsigned IsSplit : 1;
unsigned IsInAlloca : 1; ///< Passed with inalloca
+ unsigned IsPreallocated : 1; ///< ByVal without the copy
unsigned IsSplitEnd : 1; ///< Last part of a split
unsigned IsSwiftSelf : 1; ///< Swift self parameter
unsigned IsSwiftError : 1; ///< Swift error parameter
public:
ArgFlagsTy()
: IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
- IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0),
- IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0),
- IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
+ IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0),
+ IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0),
+ IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0),
PointerAddrSpace(0) {
bool isInAlloca() const { return IsInAlloca; }
void setInAlloca() { IsInAlloca = 1; }
+ bool isPreallocated() const { return IsPreallocated; }
+ void setPreallocated() { IsPreallocated = 1; }
+
bool isSwiftSelf() const { return IsSwiftSelf; }
void setSwiftSelf() { IsSwiftSelf = 1; }
bool IsNest : 1;
bool IsByVal : 1;
bool IsInAlloca : 1;
+ bool IsPreallocated : 1;
bool IsReturned : 1;
bool IsSwiftSelf : 1;
bool IsSwiftError : 1;
bool IsCFGuardTarget : 1;
MaybeAlign Alignment = None;
Type *ByValType = nullptr;
+ Type *PreallocatedType = nullptr;
ArgListEntry()
: IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
- IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
- IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {}
+ IsNest(false), IsByVal(false), IsInAlloca(false),
+ IsPreallocated(false), IsReturned(false), IsSwiftSelf(false),
+ IsSwiftError(false), IsCFGuardTarget(false) {}
void setAttributes(const CallBase *Call, unsigned ArgIdx);
};
bool IsReturnValueUsed : 1;
bool IsConvergent : 1;
bool IsPatchPoint : 1;
+ bool IsPreallocated : 1;
// IsTailCall should be modified by implementations of
// TargetLowering::LowerCall that perform tail call conversions.
CallLoweringInfo(SelectionDAG &DAG)
: RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
- IsPatchPoint(false), DAG(DAG) {}
+ IsPatchPoint(false), IsPreallocated(false), DAG(DAG) {}
CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
DL = dl;
return *this;
}
+ CallLoweringInfo &setIsPreallocated(bool Value = true) {
+ IsPreallocated = Value;
+ return *this;
+ }
+
CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
IsPostTypeLegalization = Value;
return *this;
/// Return true if this argument has the inalloca attribute.
bool hasInAllocaAttr() const;
+ /// Return true if this argument has the preallocated attribute.
+ bool hasPreallocatedAttr() const;
+
/// Return true if this argument has the zext attribute.
bool hasZExtAttr() const;
/// Return the byval type for the specified function parameter.
Type *getParamByValType(unsigned ArgNo) const;
+ /// Return the preallocated type for the specified function parameter.
+ Type *getParamPreallocatedType(unsigned ArgNo) const;
+
/// Get the stack alignment.
MaybeAlign getStackAlignment(unsigned Index) const;
return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
}
+ /// Extract the preallocated type for a call or parameter.
+ Type *getParamPreallocatedType(unsigned ArgNo) const {
+ Type *Ty = Attrs.getParamPreallocatedType(ArgNo);
+ return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
+ }
+
/// Extract the number of dereferenceable bytes for a call or
/// parameter (0=unknown).
uint64_t getDereferenceableBytes(unsigned i) const {
/// additionally expand this pseudo after register allocation.
HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD)
+/// These are used to support call sites that must have the stack adjusted
+/// before the call (e.g. to initialize an argument passed by value).
+/// See llvm.call.preallocated.{setup,arg} in the LangRef for more details.
+HANDLE_TARGET_OPCODE(PREALLOCATED_SETUP)
+HANDLE_TARGET_OPCODE(PREALLOCATED_ARG)
+
/// Call instruction with associated vm state for deoptimization and list
/// of live pointers for relocation by the garbage collector. It is
/// intended to support garbage collection with fully precise relocating
let hasSideEffects = 0;
bit isPseudo = 1;
}
+def PREALLOCATED_SETUP : StandardPseudoInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins i32imm:$a);
+ let usesCustomInserter = 1;
+ let hasSideEffects = 1;
+}
+def PREALLOCATED_ARG : StandardPseudoInstruction {
+ let OutOperandList = (outs ptr_rc:$loc);
+ let InOperandList = (ins i32imm:$a, i32imm:$b);
+ let usesCustomInserter = 1;
+ let hasSideEffects = 1;
+}
def LOCAL_ESCAPE : StandardPseudoInstruction {
// This instruction is really just a label. It has to be part of the chain so
// that it doesn't get dropped from the DAG, but it produces nothing and has
class CCIfByVal<CCAction A> : CCIf<"ArgFlags.isByVal()", A> {
}
+/// CCIfPreallocated - If the current argument has Preallocated parameter attribute,
+/// apply Action A.
+class CCIfPreallocated<CCAction A> : CCIf<"ArgFlags.isPreallocated()", A> {
+}
+
/// CCIfSwiftSelf - If the current argument has swiftself parameter attribute,
/// apply Action A.
class CCIfSwiftSelf<CCAction A> : CCIf<"ArgFlags.isSwiftSelf()", A> {
Flags.setSwiftError();
if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
Flags.setByVal();
+ if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated))
+ Flags.setPreallocated();
if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
Flags.setInAlloca();
- if (Flags.isByVal() || Flags.isInAlloca()) {
+ if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
// the various CC lowering callbacks.
Flags.setByVal();
}
- if (Arg.IsByVal || Arg.IsInAlloca) {
+ if (Arg.IsPreallocated) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If we
+ // port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) {
PointerType *Ty = cast<PointerType>(Arg.Ty);
Type *ElementTy = Ty->getElementType();
unsigned FrameSize =
}
SDValue SelectionDAG::getSrcValue(const Value *V) {
- assert((!V || V->getType()->isPointerTy()) &&
- "SrcValue is not a pointer?");
-
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
ID.AddPointer(V);
LowerCallTo(I, Callee, I.isTailCall());
}
+/// Given a @llvm.call.preallocated.setup, return the corresponding
+/// preallocated call.
+static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
+ assert(cast<CallBase>(PreallocatedSetup)
+ ->getCalledFunction()
+ ->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
+ "expected call_preallocated_setup Value");
+ for (auto *U : PreallocatedSetup->users()) {
+ auto *UseCall = cast<CallBase>(U);
+ const Function *Fn = UseCall->getCalledFunction();
+ if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
+ return UseCall;
+ }
+ }
+ llvm_unreachable("expected corresponding call to preallocated setup/arg");
+}
+
/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
updateDAGForMaybeTailCall(MC);
return;
}
+ case Intrinsic::call_preallocated_setup: {
+ const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
+ SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+ SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
+ getRoot(), SrcValue);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return;
+ }
+ case Intrinsic::call_preallocated_arg: {
+ const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
+ SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = SrcValue;
+ Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
+ MVT::i32); // arg index
+ SDValue Res = DAG.getNode(
+ ISD::PREALLOCATED_ARG, sdl,
+ DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
const auto &DI = cast<DbgVariableIntrinsic>(I);
.setChain(getRoot())
.setCallee(RetTy, FTy, Callee, std::move(Args), CB)
.setTailCall(isTailCall)
- .setConvergent(CB.isConvergent());
+ .setConvergent(CB.isConvergent())
+ .setIsPreallocated(
+ CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode()) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
// CFGuardTarget bundles are lowered in LowerCallTo.
- assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
- LLVMContext::OB_funclet,
- LLVMContext::OB_cfguardtarget}) &&
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
+ LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledOperand());
.setChain(getRoot())
.setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
.setDiscardResult(Call->use_empty())
- .setIsPatchPoint(IsPatchPoint);
+ .setIsPatchPoint(IsPatchPoint)
+ .setIsPreallocated(
+ Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
}
/// Add a stack map intrinsic call's live variable operands to a stackmap
Flags.setCFGuardTarget();
if (Args[i].IsByVal)
Flags.setByVal();
+ if (Args[i].IsPreallocated) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If
+ // we port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
if (Args[i].IsInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// in the various CC lowering callbacks.
Flags.setByVal();
}
- if (Args[i].IsByVal || Args[i].IsInAlloca) {
+ if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
// initializes the alloca. Don't elide copies from the same argument twice.
const Value *Val = SI->getValueOperand()->stripPointerCasts();
const auto *Arg = dyn_cast<Argument>(Val);
- if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+ if (!Arg || Arg->hasPassPointeeByValueAttr() ||
Arg->getType()->isEmptyTy() ||
DL.getTypeStoreSize(Arg->getType()) !=
DL.getTypeAllocSize(AI->getAllocatedType()) ||
// in the various CC lowering callbacks.
Flags.setByVal();
}
+ if (Arg.hasAttribute(Attribute::Preallocated)) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If
+ // we port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
if (F.getCallingConv() == CallingConv::X86_INTR) {
// IA Interrupt passes frame (1st parameter) by value in the stack.
if (ArgNo == 0)
Flags.setByVal();
}
- if (Flags.isByVal() || Flags.isInAlloca()) {
+ if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
Type *ElementTy = Arg.getParamByValType();
// For ByVal, size and alignment should be passed from FE. BE will
case ISD::GC_TRANSITION_END: return "gc_transition.end";
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
case ISD::FREEZE: return "freeze";
+ case ISD::PREALLOCATED_SETUP:
+ return "call_setup";
+ case ISD::PREALLOCATED_ARG:
+ return "call_alloc";
// Bit manipulation
case ISD::ABS: return "abs";
IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
+ IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
Alignment = Call->getParamAlign(ArgIdx);
ByValType = nullptr;
- if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
+ if (IsByVal)
ByValType = Call->getParamByValType(ArgIdx);
+ PreallocatedType = nullptr;
+ if (IsPreallocated)
+ PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
return getAttributes(Index+FirstArgIndex).getByValType();
}
+Type *AttributeList::getParamPreallocatedType(unsigned Index) const {
+ return getAttributes(Index + FirstArgIndex).getPreallocatedType();
+}
+
MaybeAlign AttributeList::getStackAlignment(unsigned Index) const {
return getAttributes(Index).getStackAlignment();
}
return hasAttribute(Attribute::InAlloca);
}
+bool Argument::hasPreallocatedAttr() const {
+ if (!getType()->isPointerTy())
+ return false;
+ return hasAttribute(Attribute::Preallocated);
+}
+
bool Argument::hasPassPointeeByValueAttr() const {
if (!getType()->isPointerTy()) return false;
AttributeList Attrs = getParent()->getAttributes();
/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
/// values are spilled on the stack.
def CC_X86_32_Common : CallingConv<[
- // Handles byval parameters.
+ // Handles byval/preallocated parameters.
CCIfByVal<CCPassByVal<4, 4>>,
+ CCIfPreallocated<CCPassByVal<4, 4>>,
// The first 3 float or double arguments, if marked 'inreg' and if the call
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
return false;
for (auto Flag : CLI.OutFlags)
- if (Flag.isSwiftError())
+ if (Flag.isSwiftError() || Flag.isPreallocated())
return false;
SmallVector<MVT, 16> OutVTs;
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo().hasVarSizedObjects() &&
- !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
+ !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
+ !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
}
/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
bool
X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) ||
+ MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
(hasFP(MF) && !TRI->needsStackRealignment(MF)) ||
TRI->hasBasePointer(MF);
}
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
- TRI->needsStackRealignment(MF) ||
- MFI.hasVarSizedObjects() ||
+ TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects() ||
MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
MFI.hasCopyImplyingStackAdjustment());
CurDAG->RemoveDeadNode(Node);
return;
}
+ case ISD::PREALLOCATED_SETUP: {
+ auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ auto CallId = MFI->getPreallocatedIdForCallSite(
+ cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
+ SDValue Chain = Node->getOperand(0);
+ SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
+ MachineSDNode *New = CurDAG->getMachineNode(
+ TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
+ ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ case ISD::PREALLOCATED_ARG: {
+ auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ auto CallId = MFI->getPreallocatedIdForCallSite(
+ cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
+ SDValue Chain = Node->getOperand(0);
+ SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
+ SDValue ArgIndex = Node->getOperand(2);
+ SDValue Ops[3];
+ Ops[0] = CallIdValue;
+ Ops[1] = ArgIndex;
+ Ops[2] = Chain;
+ MachineSDNode *New = CurDAG->getMachineNode(
+ TargetOpcode::PREALLOCATED_ARG, dl,
+ CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
+ MVT::Other),
+ Ops);
+ ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
+ ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
}
SelectCode(Node);
if (ArgLocs.back().getLocMemOffset() != 0)
report_fatal_error("any parameter with the inalloca attribute must be "
"the only memory argument");
+ } else if (CLI.IsPreallocated) {
+ assert(ArgLocs.back().isMemLoc() &&
+ "cannot use preallocated attribute on a register "
+ "parameter");
+ SmallVector<size_t, 4> PreallocatedOffsets;
+ for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
+ if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
+ PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
+ }
+ }
+ auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
+ MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
+ MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
+ NumBytesToPush = 0;
}
if (!IsSibcall && !IsMustTail)
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
++I, ++OutIndex) {
assert(OutIndex < Outs.size() && "Invalid Out index");
- // Skip inalloca arguments, they have already been written.
+ // Skip inalloca/preallocated arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
- if (Flags.isInAlloca())
+ if (Flags.isInAlloca() || Flags.isPreallocated())
continue;
CCValAssign &VA = ArgLocs[I];
assert(VA.isMemLoc());
SDValue Arg = OutVals[OutsIndex];
ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
- // Skip inalloca arguments. They don't require any work.
- if (Flags.isInAlloca())
+ // Skip inalloca/preallocated arguments. They don't require any work.
+ if (Flags.isInAlloca() || Flags.isPreallocated())
continue;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
BB->addLiveIn(BasePtr);
return BB;
}
+ case TargetOpcode::PREALLOCATED_SETUP: {
+ assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
+ auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+ MFI->setHasPreallocatedCall(true);
+ int64_t PreallocatedId = MI.getOperand(0).getImm();
+ size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
+ assert(StackAdjustment != 0 && "0 stack adjustment");
+ LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment "
+ << StackAdjustment << "\n");
+ BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP)
+ .addReg(X86::ESP)
+ .addImm(StackAdjustment);
+ MI.eraseFromParent();
+ return BB;
+ }
+ case TargetOpcode::PREALLOCATED_ARG: {
+ assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
+ int64_t PreallocatedId = MI.getOperand(1).getImm();
+ int64_t ArgIdx = MI.getOperand(2).getImm();
+ auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+ size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
+ LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
+ << ", arg offset " << ArgOffset << "\n");
+ // stack pointer + offset
+ addRegOffset(
+ BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()),
+ X86::ESP, false, ArgOffset);
+ MI.eraseFromParent();
+ return BB;
+ }
}
}
#ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
/// True if this function has WIN_ALLOCA instructions.
bool HasWinAlloca = false;
+ /// True if this function has any preallocated calls.
+ bool HasPreallocatedCall = false;
+
+ ValueMap<const Value *, size_t> PreallocatedIds;
+ SmallVector<size_t, 0> PreallocatedStackSizes;
+ SmallVector<SmallVector<size_t, 4>, 0> PreallocatedArgOffsets;
+
private:
/// ForwardedMustTailRegParms - A list of virtual and physical registers
/// that must be forwarded to every musttail call.
bool hasWinAlloca() const { return HasWinAlloca; }
void setHasWinAlloca(bool v) { HasWinAlloca = v; }
+
+ bool hasPreallocatedCall() const { return HasPreallocatedCall; }
+ void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }
+
+ size_t getPreallocatedIdForCallSite(const Value *CS) {
+ auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()});
+ if (Insert.second) {
+ PreallocatedStackSizes.push_back(0);
+ PreallocatedArgOffsets.emplace_back();
+ }
+ return Insert.first->second;
+ }
+
+ void setPreallocatedStackSize(size_t Id, size_t StackSize) {
+ PreallocatedStackSizes[Id] = StackSize;
+ }
+
+ size_t getPreallocatedStackSize(const size_t Id) {
+ assert(PreallocatedStackSizes[Id] != 0 && "stack size not set");
+ return PreallocatedStackSizes[Id];
+ }
+
+ void setPreallocatedArgOffsets(size_t Id, ArrayRef<size_t> AO) {
+ PreallocatedArgOffsets[Id].assign(AO.begin(), AO.end());
+ }
+
+ const ArrayRef<size_t> getPreallocatedArgOffsets(const size_t Id) {
+ assert(!PreallocatedArgOffsets[Id].empty() && "arg offsets not set");
+ return PreallocatedArgOffsets[Id];
+ }
};
} // End llvm namespace
}
bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
- const MachineFrameInfo &MFI = MF.getFrameInfo();
-
- if (!EnableBasePointer)
- return false;
-
- // When we need stack realignment, we can't address the stack from the frame
- // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
- // can't address variables from the stack pointer. MS inline asm can
- // reference locals while also adjusting the stack pointer. When we can't
- // use both the SP and the FP, we need a separate base pointer register.
- bool CantUseFP = needsStackRealignment(MF);
- return CantUseFP && CantUseSP(MFI);
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ if (X86FI->hasPreallocatedCall())
+ return true;
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ if (!EnableBasePointer)
+ return false;
+
+ // When we need stack realignment, we can't address the stack from the frame
+ // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
+ // can't address variables from the stack pointer. MS inline asm can
+ // reference locals while also adjusting the stack pointer. When we can't
+ // use both the SP and the FP, we need a separate base pointer register.
+ bool CantUseFP = needsStackRealignment(MF);
+ return CantUseFP && CantUseSP(MFI);
}
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
// CI should not has any ABI-impacting function attributes.
static const Attribute::AttrKind ABIAttrs[] = {
- Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
- Attribute::InReg, Attribute::Returned, Attribute::SwiftSelf,
- Attribute::SwiftError};
+ Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
+ Attribute::Preallocated, Attribute::InReg, Attribute::Returned,
+ Attribute::SwiftSelf, Attribute::SwiftError};
AttributeList Attrs = CI.getAttributes();
for (auto AK : ABIAttrs)
if (Attrs.hasParamAttribute(0, AK))
AttributeList FnAttributeList = Fn->getAttributes();
if (FnAttributeList.hasAttrSomewhere(Attribute::Nest) ||
FnAttributeList.hasAttrSomewhere(Attribute::StructRet) ||
- FnAttributeList.hasAttrSomewhere(Attribute::InAlloca)) {
+ FnAttributeList.hasAttrSomewhere(Attribute::InAlloca) ||
+ FnAttributeList.hasAttrSomewhere(Attribute::Preallocated)) {
LLVM_DEBUG(
dbgs() << "[Attributor] Cannot rewrite due to complex attribute\n");
return false;
AAValueSimplifyImpl::initialize(A);
if (!getAnchorScope() || getAnchorScope()->isDeclaration())
indicatePessimisticFixpoint();
- if (hasAttr({Attribute::InAlloca, Attribute::StructRet, Attribute::Nest},
+ if (hasAttr({Attribute::InAlloca, Attribute::Preallocated,
+ Attribute::StructRet, Attribute::Nest},
/* IgnoreSubsumingPositions */ true))
indicatePessimisticFixpoint();
// TODO: From readattrs.ll: "inalloca parameters are always
// considered written"
- if (hasAttr({Attribute::InAlloca})) {
+ if (hasAttr({Attribute::InAlloca, Attribute::Preallocated})) {
removeKnownBits(NO_WRITES);
removeAssumedBits(NO_WRITES);
}
// We consider arguments of non-internal functions to be intrinsically alive as
// well as arguments to functions which have their "address taken".
void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
- // Functions with inalloca parameters are expecting args in a particular
- // register and memory layout.
- if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
+ // Functions with inalloca/preallocated parameters are expecting args in a
+ // particular register and memory layout.
+ if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+ F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
MarkLive(F);
return;
}
SmallPtrSet<Use *, 32> Visited;
// inalloca arguments are always clobbered by the call.
- if (A->hasInAllocaAttr())
+ if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
return Attribute::None;
bool IsRead = false;
// wouldn't be safe in the presence of inalloca.
// FIXME: We should also hoist alloca affected by this to the entry
// block if possible.
+ // FIXME: handle preallocated
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
!F->hasAddressTaken()) {
RemoveAttribute(F, Attribute::InAlloca);
//
// Similarly, avoid folding away bitcasts of byval calls.
if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+ Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated) ||
Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return false;
; CHECK: calll _addrof_i32
; CHECK: retl
+define void @avoid_preallocated(i32* preallocated(i32) %x) {
+entry:
+ %x.p.p = alloca i32*
+ store i32* %x, i32** %x.p.p
+ call void @addrof_i32(i32* %x)
+ ret void
+}
+
+; CHECK-LABEL: _avoid_preallocated:
+; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i32
+; CHECK: retl
+
; Don't elide the copy when the alloca is escaped with a store.
define void @escape_with_store(i32 %x) {
%x1 = alloca i32
; Each member pointer creates a thunk. The ones with inalloca are required to
; tail calls by the ABI, even at O0.
+; TODO: add tests for preallocated/musttail once supported
+
%struct.B = type { i32 (...)** }
%struct.A = type { i32 }
; RUN: llc -verify-machineinstrs -mtriple=i686-- < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=i686-- -O0 < %s | FileCheck %s
+; TODO: add tests for preallocated/musttail once supported
+
; CHECK-LABEL: t1:
; CHECK: jmp {{_?}}t1_callee
define x86_thiscallcc void @t1(i8* %this) {
--- /dev/null
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+; XFAIL: *
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+%Foo = type { i32, i32 }
+
+declare void @init(%Foo*)
+
+
+
+declare void @foo_p(%Foo* preallocated(%Foo))
+
+define void @no_call() {
+; CHECK-LABEL: _no_call:
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b = bitcast i8* %a to %Foo*
+ call void @init(%Foo* %b)
+ ret void
+}
--- /dev/null
+; RUN: llc %s -mtriple=x86_64-windows-msvc -o /dev/null 2>&1
+; XFAIL: *
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+%Foo = type { i32, i32 }
+
+declare x86_thiscallcc void @f(i32, %Foo* preallocated(%Foo))
+
+define void @g() {
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b = bitcast i8* %a to %Foo*
+ call void @f(i32 0, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+ ret void
+}
--- /dev/null
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+%Foo = type { i32, i32 }
+
+declare void @init(%Foo*)
+
+
+
+declare void @foo_p(%Foo* preallocated(%Foo))
+
+define void @one_preallocated() {
+; CHECK-LABEL: _one_preallocated:
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: calll _foo_p
+ call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+ ret void
+}
+
+define void @one_preallocated_two_blocks() {
+; CHECK-LABEL: _one_preallocated_two_blocks:
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+ br label %second
+second:
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: calll _foo_p
+ call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+ ret void
+}
+
+define void @preallocated_with_store() {
+; CHECK-LABEL: _preallocated_with_store:
+; CHECK: subl $8, %esp
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b = bitcast i8* %a to %Foo*
+ %p0 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
+ %p1 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
+ store i32 13, i32* %p0
+ store i32 42, i32* %p1
+; CHECK-DAG: movl $13, ([[REGISTER]])
+; CHECK-DAG: movl $42, 4([[REGISTER]])
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_p
+ call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+ ret void
+}
+
+define void @preallocated_with_init() {
+; CHECK-LABEL: _preallocated_with_init:
+; CHECK: subl $8, %esp
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b = bitcast i8* %a to %Foo*
+; CHECK: pushl [[REGISTER]]
+; CHECK: calll _init
+ call void @init(%Foo* %b)
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_p
+ call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+ ret void
+}
+
+declare void @foo_p_p(%Foo* preallocated(%Foo), %Foo* preallocated(%Foo))
+
+define void @two_preallocated() {
+; CHECK-LABEL: _two_preallocated:
+ %t = call token @llvm.call.preallocated.setup(i32 2)
+ %a1 = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b1 = bitcast i8* %a1 to %Foo*
+ %a2 = call i8* @llvm.call.preallocated.arg(token %t, i32 1) preallocated(%Foo)
+ %b2 = bitcast i8* %a2 to %Foo*
+; CHECK: subl $16, %esp
+; CHECK: calll _foo_p_p
+ call void @foo_p_p(%Foo* preallocated(%Foo) %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t)]
+ ret void
+}
+
+declare void @foo_p_int(%Foo* preallocated(%Foo), i32)
+
+define void @one_preallocated_one_normal() {
+; CHECK-LABEL: _one_preallocated_one_normal:
+; CHECK: subl $12, %esp
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b = bitcast i8* %a to %Foo*
+; CHECK: pushl [[REGISTER]]
+; CHECK: calll _init
+ call void @init(%Foo* %b)
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: movl $2, 8(%esp)
+; CHECK: calll _foo_p_int
+ call void @foo_p_int(%Foo* preallocated(%Foo) %b, i32 2) ["preallocated"(token %t)]
+ ret void
+}
+
+declare void @foo_ret_p(%Foo* sret, %Foo* preallocated(%Foo))
+
+define void @nested_with_init() {
+; CHECK-LABEL: _nested_with_init:
+ %tmp = alloca %Foo
+
+ %t1 = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: subl $12, %esp
+ %a1 = call i8* @llvm.call.preallocated.arg(token %t1, i32 0) preallocated(%Foo)
+ %b1 = bitcast i8* %a1 to %Foo*
+; CHECK: leal 4(%esp), [[REGISTER1:%[a-z]+]]
+
+ %t2 = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: subl $12, %esp
+ %a2 = call i8* @llvm.call.preallocated.arg(token %t2, i32 0) preallocated(%Foo)
+; CHECK: leal 4(%esp), [[REGISTER2:%[a-z]+]]
+ %b2 = bitcast i8* %a2 to %Foo*
+
+ call void @init(%Foo* %b2)
+; CHECK: pushl [[REGISTER2]]
+; CHECK: calll _init
+
+ call void @foo_ret_p(%Foo* %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)]
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_ret_p
+ call void @foo_ret_p(%Foo* %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)]
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_ret_p
+ ret void
+}
+
+declare void @foo_inreg_p(i32 inreg, %Foo* preallocated(%Foo))
+
+define void @inreg() {
+; CHECK-LABEL: _inreg:
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: movl $9, %eax
+; CHECK: calll _foo_inreg_p
+ call void @foo_inreg_p(i32 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+ ret void
+}
+
+declare x86_thiscallcc void @foo_thiscall_p(i8*, %Foo* preallocated(%Foo))
+
+define void @thiscall() {
+; CHECK-LABEL: _thiscall:
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: xorl %ecx, %ecx
+; CHECK: calll _foo_thiscall_p
+ call x86_thiscallcc void @foo_thiscall_p(i8* null, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+ ret void
+}
+
+declare x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo))
+declare x86_stdcallcc void @i(i32)
+
+define void @stdcall() {
+; CHECK-LABEL: _stdcall:
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+ %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: calll _foo_stdcall_p@8
+ call x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+; CHECK-NOT: %esp
+; CHECK: pushl
+; CHECK: calll _i@4
+ call x86_stdcallcc void @i(i32 0)
+ ret void
+}
; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s
+; TODO: add preallocated versions of tests
+; we don't yet support conditionally called preallocated calls after the setup
+
; chkstk cannot come before the usual prologue, since it adjusts ESP.
; If chkstk is used in the prologue, we also have to be careful about preserving
; EAX if it is used.
declare x86_stdcallcc void @tail_std(i32)
declare void @capture(i32*)
+define x86_thiscallcc void @preallocated(i32* %this, i32* preallocated(i32) %args) {
+entry:
+ %val = load i32, i32* %args
+ store i32 0, i32* %args
+ tail call x86_stdcallcc void @tail_std(i32 %val)
+ ret void
+}
+
+; CHECK-LABEL: _preallocated: # @preallocated
+; CHECK: movl 4(%esp), %[[reg:[^ ]*]]
+; CHECK: movl $0, 4(%esp)
+; CHECK: pushl %[[reg]]
+; CHECK: calll _tail_std@4
+; CHECK: retl $4
+
define x86_thiscallcc void @inalloca(i32* %this, i32* inalloca %args) {
entry:
%val = load i32, i32* %args
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
declare void @f(i32)
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
; Test1: Replace argument with constant
define internal void @test1(i32 %a) {
ret i32* %call
}
+define internal i32* @test_preallocated(i32* preallocated(i32) %a) {
+; CHECK-LABEL: define {{[^@]+}}@test_preallocated
+; CHECK-SAME: (i32* noalias nofree returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]])
+; CHECK-NEXT: ret i32* [[A]]
+;
+ ret i32* %a
+}
+define i32* @complicated_args_preallocated() {
+; CHECK-LABEL: define {{[^@]+}}@complicated_args_preallocated()
+; CHECK-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null)
+; CHECK-NEXT: ret i32* [[CALL]]
+;
+ %c = call token @llvm.call.preallocated.setup(i32 1)
+ %call = call i32* @test_preallocated(i32* preallocated(i32) null) ["preallocated"(token %c)]
+ ret i32* %call
+}
+
define internal void @test_sret(%struct.X* sret %a, %struct.X** %b) {
;
; CHECK-LABEL: define {{[^@]+}}@test_sret
; RUN: opt < %s -deadargelim -S | FileCheck %s
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
%Ty = type <{ i32, i32 }>
; Check if the pass doesn't modify anything that doesn't need changing. We feed
ret i32 %v
}
+; We can't remove 'this' here, as that would put argmem in ecx instead of
+; memory.
+define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem) {
+ %v = load i32, i32* %argmem
+ ret i32 %v
+}
+; CHECK-LABEL: define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem)
+
+define i32 @caller3() {
+ %t = alloca i32
+ %c = call token @llvm.call.preallocated.setup(i32 1)
+ %M = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
+ %m = bitcast i8* %M to i32*
+ store i32 42, i32* %m
+ %v = call x86_thiscallcc i32 @unused_this_preallocated(i32* %t, i32* preallocated(i32) %m) ["preallocated"(token %c)]
+ ret i32 %v
+}
+
; CHECK: attributes #0 = { nounwind }
ret void
}
+; Test for preallocated handling.
+define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
+; CHECK-LABEL: @test9_3(
+; CHECK-NEXT: ret void
+;
+ %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+ store i32 1, i32* %tmp2, align 4
+ ret void
+}
+
; DSE should delete the dead trampoline.
declare void @test11f()
define void @test11() {
ret void
}
+; Test for preallocated handling.
+define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
+; CHECK-LABEL: @test9_3(
+; CHECK-NEXT: ret void
+;
+ %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+ store i32 1, i32* %tmp2, align 4
+ ret void
+}
+
; va_arg has fuzzy dependence, the store shouldn't be zapped.
define double @test10(i8* %X) {
; CHECK-LABEL: @test10(
ret void
}
+; CHECK: define void @test7_2(i32* nocapture preallocated(i32) %a)
+; preallocated parameters are always considered written
+define void @test7_2(i32* preallocated(i32) %a) {
+ ret void
+}
+
; CHECK: define i32* @test8_1(i32* readnone returned %p)
define i32* @test8_1(i32* %p) {
entry:
; RUN: opt < %s -globalopt -S | FileCheck %s
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
define internal i32 @f(i32* %m) {
; CHECK-LABEL: define internal fastcc i32 @f
%v = load i32, i32* %m
ret i32 %rv
}
+define internal i32 @preallocated(i32* preallocated(i32) %p) {
+; TODO: handle preallocated:
+; CHECK-NOT-LABEL: define internal fastcc i32 @preallocated(i32* %p)
+ %rv = load i32, i32* %p
+ ret i32 %rv
+}
+
define void @call_things() {
%m = alloca i32
call i32 @f(i32* %m)
call i32 @j(i32* %m)
%args = alloca inalloca i32
call i32 @inalloca(i32* inalloca %args)
+ ; TODO: handle preallocated
+ ;%c = call token @llvm.call.preallocated.setup(i32 1)
+ ;%N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
+ ;%n = bitcast i8* %N to i32*
+ ;call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)]
ret void
}
--- /dev/null
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-win32"
+
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+declare void @takes_i32(i32)
+declare void @takes_i32_preallocated(i32* preallocated(i32))
+
+define void @f() {
+; CHECK-LABEL: define void @f()
+ %t = call token @llvm.call.preallocated.setup(i32 1)
+ %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(i32)
+ %arg = bitcast i8* %a to i32*
+ call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* preallocated(i32) %arg) ["preallocated"(token %t)]
+; CHECK: call void bitcast{{.*}}@takes_i32
+ ret void
+}
+
+define void @g() {
+; CHECK-LABEL: define void @g()
+ call void bitcast (void (i32*)* @takes_i32_preallocated to void (i32)*)(i32 0)
+; CHECK: call void bitcast{{.*}}@takes_i32_preallocated
+ ret void
+}