[PowerPC] Add StackMap/PatchPoint support

author Hal Finkel <hfinkel@anl.gov>

Tue, 13 Jan 2015 17:48:12 +0000 (17:48 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Tue, 13 Jan 2015 17:48:12 +0000 (17:48 +0000)
author Hal Finkel <hfinkel@anl.gov>
Tue, 13 Jan 2015 17:48:12 +0000 (17:48 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Tue, 13 Jan 2015 17:48:12 +0000 (17:48 +0000)
diff --git a/llvm/docs/StackMaps.rst b/llvm/docs/StackMaps.rst

index bd0fb94..5bb0554 100644 (file)
--- a/llvm/docs/StackMaps.rst
+++ b/llvm/docs/StackMaps.rst
@@ -221,6 +221,12 @@ lowered according to the calling convention specified at the
  intrinsic's callsite. Variants of the intrinsic with non-void return
  type also return a value according to calling convention.
  
+On PowerPC, note that the ``<target>`` must be the actual intended target of
+the indirect call, not the function-descriptor address normally used as the
+C/C++ function-pointer representation. As a result, the call target must be
+local because no adjustment or restoration of the TOC pointer (in register r2)
+will be performed.
+
  Requesting zero patch point arguments is valid. In this case, all
  variable operands are handled just like
  ``llvm.experimental.stackmap.*``. The difference is that space will
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp

index 5648873..decc1b2 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -34,6 +34,7 @@
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineModuleInfoImpls.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
  #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  #include "llvm/IR/Constants.h"
  #include "llvm/IR/DebugInfo.h"
@@ -69,10 +70,11 @@ namespace {
      MapVector<MCSymbol*, MCSymbol*> TOC;
      const PPCSubtarget &Subtarget;
      uint64_t TOCLabelID;
+    StackMaps SM;
    public:
      explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
        : AsmPrinter(TM, Streamer),
-        Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
+        Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0), SM(*this) {}
  
      const char *getPassName() const override {
        return "PowerPC Assembly Printer";
@@ -90,6 +92,13 @@ namespace {
      bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                                 unsigned AsmVariant, const char *ExtraCode,
                                 raw_ostream &O) override;
+
+    void EmitEndOfAsmFile(Module &M) override;
+
+    void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+                       const MachineInstr &MI);
+    void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+                         const MachineInstr &MI);
    };
  
    /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
@@ -316,6 +325,80 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
    return TOCEntry;
  }
  
+void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
+  SM.serializeToStackMapSection();
+}
+
+void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+                                  const MachineInstr &MI) {
+  unsigned NumNOPBytes = MI.getOperand(1).getImm();
+
+  SM.recordStackMap(MI);
+  assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+
+  // Scan ahead to trim the shadow.
+  const MachineBasicBlock &MBB = *MI.getParent();
+  MachineBasicBlock::const_iterator MII(MI);
+  ++MII;
+  while (NumNOPBytes > 0) {
+    if (MII == MBB.end() || MII->isCall() ||
+        MII->getOpcode() == PPC::DBG_VALUE ||
+        MII->getOpcode() == TargetOpcode::PATCHPOINT ||
+        MII->getOpcode() == TargetOpcode::STACKMAP)
+      break;
+    ++MII;
+    NumNOPBytes -= 4;
+  }
+
+  // Emit nops.
+  for (unsigned i = 0; i < NumNOPBytes; i += 4)
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+}
+
+// Lower a patchpoint of the form:
+// [<def>], <id>, <numBytes>, <target>, <numArgs>
+void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+                                    const MachineInstr &MI) {
+  SM.recordPatchPoint(MI);
+  PatchPointOpers Opers(&MI);
+
+  int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
+  unsigned EncodedBytes = 0;
+  if (CallTarget) {
+    assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
+           "High 16 bits of call target should be zero.");
+    unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
+    EncodedBytes = 6*4;
+    // Materialize the jump address:
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
+                                    .addReg(ScratchReg)
+                                    .addImm((CallTarget >> 32) & 0xFFFF));
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
+                                    .addReg(ScratchReg)
+                                    .addReg(ScratchReg)
+                                    .addImm(32).addImm(16));
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
+                                    .addReg(ScratchReg)
+                                    .addReg(ScratchReg)
+                                    .addImm((CallTarget >> 16) & 0xFFFF));
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
+                                    .addReg(ScratchReg)
+                                    .addReg(ScratchReg)
+                                    .addImm(CallTarget & 0xFFFF));
+
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg));
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
+  }
+
+  // Emit padding.
+  unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+  assert(NumBytes >= EncodedBytes &&
+         "Patchpoint can't request size less than the length of a call.");
+  assert((NumBytes - EncodedBytes) % 4 == 0 &&
+         "Invalid number of NOP bytes requested!");
+  for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+}
  
  /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
  /// the current output stream.
@@ -332,6 +415,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    default: break;
    case TargetOpcode::DBG_VALUE:
      llvm_unreachable("Should be handled target independently");
+  case TargetOpcode::STACKMAP:
+    return LowerSTACKMAP(OutStreamer, SM, *MI);
+  case TargetOpcode::PATCHPOINT:
+    return LowerPATCHPOINT(OutStreamer, SM, *MI);
+
    case PPC::MoveGOTtoLR: {
      // Transform %LR = MoveGOTtoLR
      // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.h b/llvm/lib/Target/PowerPC/PPCCallingConv.h

new file mode 100644 (file)

index 0000000..eb904a8
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.h
@@ -0,0 +1,35 @@
+//=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the PPC Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H
+#define LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
+                                CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+                                CCState &) {
+  llvm_unreachable("The AnyReg calling convention is only supported by the " \
+                   "stackmap and patchpoint intrinsics.");
+  // gracefully fallback to PPC C calling convention on Release builds.
+  return false;
+}
+
+} // End llvm namespace
+
+#endif
+
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td

index cf8fee4..56176f1 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -28,8 +28,21 @@ class CCIfNotSubtarget<string F, CCAction A>
  // Return Value Calling Convention
  //===----------------------------------------------------------------------===//
  
+// PPC64 AnyReg return-value convention. No explicit register is specified for
+// the return-value. The register allocator is allowed and expected to choose
+// any free register.
+//
+// This calling convention is currently only supported by the stackmap and
+// patchpoint intrinsics. All other uses will result in an assert on Debug
+// builds. On Release builds we fallback to the PPC C calling convention.
+def RetCC_PPC64_AnyReg : CallingConv<[
+  CCCustom<"CC_PPC_AnyReg_Error">
+]>;
+
  // Return-value convention for PowerPC
  def RetCC_PPC : CallingConv<[
+  CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
+
    // On PPC64, integer return values are always promoted to i64
    CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
    CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>,
@@ -51,6 +64,15 @@ def RetCC_PPC : CallingConv<[
             CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
  ]>;
  
+// No explicit register is specified for the AnyReg calling convention. The
+// register allocator may assign the arguments to any free register.
+//
+// This calling convention is currently only supported by the stackmap and
+// patchpoint intrinsics. All other uses will result in an assert on Debug
+// builds. On Release builds we fallback to the PPC C calling convention.
+def CC_PPC64_AnyReg : CallingConv<[
+  CCCustom<"CC_PPC_AnyReg_Error">
+]>;
  
  // Note that we don't currently have calling conventions for 64-bit
  // PowerPC, but handle all the complexities of the ABI in the lowering
@@ -61,6 +83,8 @@ def RetCC_PPC : CallingConv<[
  // Only handle ints and floats.  All ints are promoted to i64.
  // Vector types and quadword ints are not handled.
  def CC_PPC64_ELF_FIS : CallingConv<[
+  CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>,
+
    CCIfType<[i1],  CCPromoteToType<i64>>,
    CCIfType<[i8],  CCPromoteToType<i64>>,
    CCIfType<[i16], CCPromoteToType<i64>>,
@@ -74,6 +98,8 @@ def CC_PPC64_ELF_FIS : CallingConv<[
  // and multiple register returns are "supported" to avoid compile
  // errors, but none are handled by the fast selector.
  def RetCC_PPC64_ELF_FIS : CallingConv<[
+  CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
+
    CCIfType<[i1],   CCPromoteToType<i64>>,
    CCIfType<[i8],   CCPromoteToType<i64>>,
    CCIfType<[i16],  CCPromoteToType<i64>>,
@@ -203,3 +229,15 @@ def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
  
  def CSR_NoRegs : CalleeSavedRegs<(add)>;
  
+def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10),
+                                             (sequence "X%u", 14, 31),
+                                             (sequence "F%u", 0, 31),
+                                             (sequence "CR%u", 0, 7))>;
+
+def CSR_64_AllRegs_Altivec : CalleeSavedRegs<(add CSR_64_AllRegs,
+                                             (sequence "V%u", 0, 31))>;
+
+def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
+                                         (sequence "VSL%u", 0, 31),
+                                         (sequence "VSH%u", 0, 31))>;
+
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp

index ddf13ff..6c0e376 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -15,6 +15,7 @@
  
  #include "PPC.h"
  #include "MCTargetDesc/PPCPredicates.h"
+#include "PPCCallingConv.h"
  #include "PPCISelLowering.h"
  #include "PPCSubtarget.h"
  #include "PPCTargetMachine.h"
@@ -119,6 +120,8 @@ class PPCFastISel final : public FastISel {
                               unsigned Op0, bool Op0IsKill,
                               unsigned Op1, bool Op1IsKill);
  
+    bool fastLowerCall(CallLoweringInfo &CLI);
+
    // Instruction selection routines.
    private:
      bool SelectLoad(const Instruction *I);
@@ -130,7 +133,6 @@ class PPCFastISel final : public FastISel {
      bool SelectIToFP(const Instruction *I, bool IsSigned);
      bool SelectFPToI(const Instruction *I, bool IsSigned);
      bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
-    bool SelectCall(const Instruction *I);
      bool SelectRet(const Instruction *I);
      bool SelectTrunc(const Instruction *I);
      bool SelectIntExt(const Instruction *I);
@@ -174,9 +176,7 @@ class PPCFastISel final : public FastISel {
                           CallingConv::ID CC,
                           unsigned &NumBytes,
                           bool IsVarArg);
-    void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
-                    const Instruction *I, CallingConv::ID CC,
-                    unsigned &NumBytes, bool IsVarArg);
+    bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
      CCAssignFn *usePPC32CCs(unsigned Flag);
  
    private:
@@ -1339,9 +1339,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
  
  // For a call that we've determined we can fast-select, finish the
  // call sequence and generate a copy to obtain the return value (if any).
-void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
-                             const Instruction *I, CallingConv::ID CC,
-                             unsigned &NumBytes, bool IsVarArg) {
+bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
+  CallingConv::ID CC = CLI.CallConv;
+
    // Issue CallSEQ_END.
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
            TII.get(TII.getCallFrameDestroyOpcode()))
@@ -1352,7 +1352,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
    // any real difficulties there.
    if (RetVT != MVT::isVoid) {
      SmallVector<CCValAssign, 16> RVLocs;
-    CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
+    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
      CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
      CCValAssign &VA = RVLocs[0];
      assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
@@ -1397,39 +1397,35 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
      }
  
      assert(ResultReg && "ResultReg unset!");
-    UsedRegs.push_back(SourcePhysReg);
-    updateValueMap(I, ResultReg);
+    CLI.InRegs.push_back(SourcePhysReg);
+    CLI.ResultReg = ResultReg;
+    CLI.NumResultRegs = 1;
    }
+
+  return true;
  }
  
-// Attempt to fast-select a call instruction.
-bool PPCFastISel::SelectCall(const Instruction *I) {
-  const CallInst *CI = cast<CallInst>(I);
-  const Value *Callee = CI->getCalledValue();
+bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
+  CallingConv::ID CC  = CLI.CallConv;
+  bool IsTailCall     = CLI.IsTailCall;
+  bool IsVarArg       = CLI.IsVarArg;
+  const Value *Callee = CLI.Callee;
+  const char *SymName = CLI.SymName;
  
-  // Can't handle inline asm.
-  if (isa<InlineAsm>(Callee))
+  if (!Callee && !SymName)
      return false;
  
    // Allow SelectionDAG isel to handle tail calls.
-  if (CI->isTailCall())
+  if (IsTailCall)
      return false;
  
-  // Obtain calling convention.
-  ImmutableCallSite CS(CI);
-  CallingConv::ID CC = CS.getCallingConv();
-
-  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
-  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
-  bool IsVarArg = FTy->isVarArg();
-
-  // Not ready for varargs yet.
+  // Let SDISel handle vararg functions.
    if (IsVarArg)
      return false;
  
    // Handle simple calls for now, with legal return types and
    // those that can be extended.
-  Type *RetTy = I->getType();
+  Type *RetTy = CLI.RetTy;
    MVT RetVT;
    if (RetTy->isVoidTy())
      RetVT = MVT::isVoid;
@@ -1450,7 +1446,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
  
    // Bail early if more than 8 arguments, as we only currently
    // handle arguments passed in registers.
-  unsigned NumArgs = CS.arg_size();
+  unsigned NumArgs = CLI.OutVals.size();
    if (NumArgs > 8)
      return false;
  
@@ -1465,28 +1461,16 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
    ArgVTs.reserve(NumArgs);
    ArgFlags.reserve(NumArgs);
  
-  for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
-       II != IE; ++II) {
-    // FIXME: ARM does something for intrinsic calls here, check into that.
-
-    unsigned AttrIdx = II - CS.arg_begin() + 1;
-    
+  for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
      // Only handle easy calls for now.  It would be reasonably easy
      // to handle <= 8-byte structures passed ByVal in registers, but we
      // have to ensure they are right-justified in the register.
-    if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
-        CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
-        CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
-        CS.paramHasAttr(AttrIdx, Attribute::ByVal))
+    ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
+    if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
        return false;
  
-    ISD::ArgFlagsTy Flags;
-    if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
-      Flags.setSExt();
-    if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
-      Flags.setZExt();
-
-    Type *ArgTy = (*II)->getType();
+    Value *ArgValue = CLI.OutVals[i];
+    Type *ArgTy = ArgValue->getType();
      MVT ArgVT;
      if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
        return false;
@@ -1494,14 +1478,11 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
      if (ArgVT.isVector())
        return false;
  
-    unsigned Arg = getRegForValue(*II);
+    unsigned Arg = getRegForValue(ArgValue);
      if (Arg == 0)
        return false;
  
-    unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
-    Flags.setOrigAlign(OriginalAlignment);
-
-    Args.push_back(*II);
+    Args.push_back(ArgValue);
      ArgRegs.push_back(Arg);
      ArgVTs.push_back(ArgVT);
      ArgFlags.push_back(Flags);
@@ -1515,18 +1496,28 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
                         RegArgs, CC, NumBytes, IsVarArg))
      return false;
  
+  MachineInstrBuilder MIB;
    // FIXME: No handling for function pointers yet.  This requires
    // implementing the function descriptor (OPD) setup.
    const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
-  if (!GV)
-    return false;
-
-  // Build direct call with NOP for TOC restore.
-  // FIXME: We can and should optimize away the NOP for local calls.
-  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                                    TII.get(PPC::BL8_NOP));
-  // Add callee.
-  MIB.addGlobalAddress(GV);
+  if (!GV) {
+    // patchpoints are a special case; they always dispatch to a pointer value.
+    // However, we don't actually want to generate the indirect call sequence
+    // here (that will be generated, as necessary, during asm printing), and
+    // the call we generate here will be erased by FastISel::selectPatchpoint,
+    // so don't try very hard...
+    if (CLI.IsPatchPoint)
+      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
+    else
+      return false;
+  } else {
+    // Build direct call with NOP for TOC restore.
+    // FIXME: We can and should optimize away the NOP for local calls.
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+                  TII.get(PPC::BL8_NOP));
+    // Add callee.
+    MIB.addGlobalAddress(GV);
+  }
  
    // Add implicit physical register uses to the call.
    for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
@@ -1540,14 +1531,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
    // defs for return values will be added by setPhysRegsDeadExcept().
    MIB.addRegMask(TRI.getCallPreservedMask(CC));
  
-  // Finish off the call including any return values.
-  SmallVector<unsigned, 4> UsedRegs;
-  finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);
-
-  // Set all unused physregs defs as dead.
-  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+  CLI.Call = MIB;
  
-  return true;
+  // Finish off the call including any return values.
+  return finishCall(RetVT, CLI, NumBytes);
  }
  
  // Attempt to fast-select a return instruction.
@@ -1837,9 +1824,7 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
      case Instruction::Sub:
        return SelectBinaryIntOp(I, ISD::SUB);
      case Instruction::Call:
-      if (dyn_cast<IntrinsicInst>(I))
-        return false;
-      return SelectCall(I);
+      return selectCall(I);
      case Instruction::Ret:
        return SelectRet(I);
      case Instruction::Trunc:
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp

index e39f755..1b85047 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -450,6 +450,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
  
    return MF.getTarget().Options.DisableFramePointerElim(MF) ||
      MFI->hasVarSizedObjects() ||
+    MFI->hasStackMap() || MFI->hasPatchPoint() ||
      (MF.getTarget().Options.GuaranteedTailCallOpt &&
       MF.getInfo<PPCFunctionInfo>()->hasFastCall());
  }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

index 2157003..120e4ed 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -13,6 +13,7 @@
  
  #include "PPCISelLowering.h"
  #include "MCTargetDesc/PPCPredicates.h"
+#include "PPCCallingConv.h"
  #include "PPCMachineFunctionInfo.h"
  #include "PPCPerfectShuffle.h"
  #include "PPCTargetMachine.h"
@@ -3590,6 +3591,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
  static
  unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
                       SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
+                     bool IsPatchPoint,
                       SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
                       SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
                       const PPCSubtarget &Subtarget) {
@@ -3663,7 +3665,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
      // to do the call, we can't use PPCISD::CALL.
      SDValue MTCTROps[] = {Chain, Callee, InFlag};
  
-    if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
+    if (isSVR4ABI && isPPC64 && !isELFv2ABI && !IsPatchPoint) {
        // Function pointers in the 64-bit SVR4 ABI do not point to the function
        // entry point, but to the function descriptor (the function entry point
        // address is part of the function descriptor though).
@@ -3732,9 +3734,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
        MTCTROps[2] = InFlag;
      }
  
-    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
-                        makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
-    InFlag = Chain.getValue(1);
+    if (!IsPatchPoint) {
+      Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
+                          makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
+      InFlag = Chain.getValue(1);
+    }
  
      NodeTys.clear();
      NodeTys.push_back(MVT::Other);
@@ -3743,7 +3747,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
      CallOpc = PPCISD::BCTRL;
      Callee.setNode(nullptr);
      // Add use of X11 (holding environment pointer)
-    if (isSVR4ABI && isPPC64 && !isELFv2ABI)
+    if (isSVR4ABI && isPPC64 && !isELFv2ABI && !IsPatchPoint)
        Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
      // Add CTR register as callee so a bctr can be emitted later.
      if (isTailCall)
@@ -3783,7 +3787,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
                                    RegsToPass[i].second.getValueType()));
  
    // Direct calls in the ELFv2 ABI need the TOC register live into the call.
-  if (Callee.getNode() && isELFv2ABI)
+  if (Callee.getNode() && isELFv2ABI && !IsPatchPoint)
      Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
  
    return CallOpc;
@@ -3846,7 +3850,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
  
  SDValue
  PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
-                              bool isTailCall, bool isVarArg,
+                              bool isTailCall, bool isVarArg, bool IsPatchPoint,
                                SelectionDAG &DAG,
                                SmallVector<std::pair<unsigned, SDValue>, 8>
                                  &RegsToPass,
@@ -3860,8 +3864,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
    std::vector<EVT> NodeTys;
    SmallVector<SDValue, 8> Ops;
    unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
-                                 isTailCall, RegsToPass, Ops, NodeTys,
-                                 Subtarget);
+                                 isTailCall, IsPatchPoint, RegsToPass, Ops,
+                                 NodeTys, Subtarget);
  
    // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
    if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
@@ -3963,6 +3967,7 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
    bool &isTailCall                      = CLI.IsTailCall;
    CallingConv::ID CallConv              = CLI.CallConv;
    bool isVarArg                         = CLI.IsVarArg;
+  bool IsPatchPoint                     = CLI.IsPatchPoint;
  
    if (isTailCall)
      isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
@@ -3975,23 +3980,23 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
    if (Subtarget.isSVR4ABI()) {
      if (Subtarget.isPPC64())
        return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
-                              isTailCall, Outs, OutVals, Ins,
+                              isTailCall, IsPatchPoint, Outs, OutVals, Ins,
                                dl, DAG, InVals);
      else
        return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
-                              isTailCall, Outs, OutVals, Ins,
+                              isTailCall, IsPatchPoint, Outs, OutVals, Ins,
                                dl, DAG, InVals);
    }
  
    return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
-                          isTailCall, Outs, OutVals, Ins,
+                          isTailCall, IsPatchPoint, Outs, OutVals, Ins,
                            dl, DAG, InVals);
  }
  
  SDValue
  PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
                                      CallingConv::ID CallConv, bool isVarArg,
-                                    bool isTailCall,
+                                    bool isTailCall, bool IsPatchPoint,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -4201,7 +4206,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
      PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
                      false, TailCallArguments);
  
-  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
                      RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
                      Ins, InVals);
  }
@@ -4229,7 +4234,7 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
  SDValue
  PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                      CallingConv::ID CallConv, bool isVarArg,
-                                    bool isTailCall,
+                                    bool isTailCall, bool IsPatchPoint,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -4665,7 +4670,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
    // Check if this is an indirect call (MTCTR/BCTRL).
    // See PrepareCall() for more information about calls through function
    // pointers in the 64-bit SVR4 ABI.
-  if (!isTailCall &&
+  if (!isTailCall && !IsPatchPoint &&
        !isFunctionGlobalAddress(Callee) &&
        !isa<ExternalSymbolSDNode>(Callee)) {
      // Load r2 into a virtual register and store it to the TOC save area.
@@ -4679,7 +4684,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
      // This does not mean the MTCTR instruction must use R12; it's easier
      // to model this as an extra parameter, so do that.
-    if (isELFv2ABI)
+    if (isELFv2ABI && !IsPatchPoint)
        RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
    }
  
@@ -4696,7 +4701,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
                      FPOp, true, TailCallArguments);
  
-  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
                      RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
                      Ins, InVals);
  }
@@ -4704,7 +4709,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
  SDValue
  PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                                      CallingConv::ID CallConv, bool isVarArg,
-                                    bool isTailCall,
+                                    bool isTailCall, bool IsPatchPoint,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -5089,7 +5094,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
      PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
                      FPOp, true, TailCallArguments);
  
-  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
                      RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
                      Ins, InVals);
  }
@@ -7246,6 +7251,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
  MachineBasicBlock *
  PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                 MachineBasicBlock *BB) const {
+  if (MI->getOpcode() == TargetOpcode::STACKMAP ||
+      MI->getOpcode() == TargetOpcode::PATCHPOINT)
+    return emitPatchPoint(MI, BB);
+
    if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
        MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
      return emitEHSjLjSetJmp(MI, BB);
@@ -9882,6 +9891,19 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
    return false;
  }
  
+const MCPhysReg *
+PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
+  // LR is a callee-save register, but we must treat it as clobbered by any call
+  // site. Hence we include LR in the scratch registers, which are in turn added
+  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
+  // to CTR, which is used by any indirect call.
+  static const MCPhysReg ScratchRegs[] = {
+    PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0
+  };
+
+  return ScratchRegs;
+}
+
  bool
  PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
                       EVT VT , unsigned DefinedValues) const {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h

index b171b16..1b585d8 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -570,6 +570,8 @@ namespace llvm {
      /// expanded to fmul + fadd.
      bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
  
+    const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
+
      // Should we expand the build vector with shuffles?
      bool
      shouldExpandBuildVectorWithShuffles(EVT VT,
@@ -681,7 +683,7 @@ namespace llvm {
                              SDLoc dl, SelectionDAG &DAG,
                              SmallVectorImpl<SDValue> &InVals) const;
      SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
-                       bool isVarArg,
+                       bool isVarArg, bool IsPatchPoint,
                         SelectionDAG &DAG,
                         SmallVector<std::pair<unsigned, SDValue>, 8>
                           &RegsToPass,
@@ -746,7 +748,7 @@ namespace llvm {
      SDValue
        LowerCall_Darwin(SDValue Chain, SDValue Callee,
                         CallingConv::ID CallConv,
-                       bool isVarArg, bool isTailCall,
+                       bool isVarArg, bool isTailCall, bool IsPatchPoint,
                         const SmallVectorImpl<ISD::OutputArg> &Outs,
                         const SmallVectorImpl<SDValue> &OutVals,
                         const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -755,7 +757,7 @@ namespace llvm {
      SDValue
        LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                         CallingConv::ID CallConv,
-                       bool isVarArg, bool isTailCall,
+                       bool isVarArg, bool isTailCall, bool IsPatchPoint,
                         const SmallVectorImpl<ISD::OutputArg> &Outs,
                         const SmallVectorImpl<SDValue> &OutVals,
                         const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -763,7 +765,7 @@ namespace llvm {
                         SmallVectorImpl<SDValue> &InVals) const;
      SDValue
      LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
-                     bool isVarArg, bool isTailCall,
+                     bool isVarArg, bool isTailCall, bool IsPatchPoint,
                       const SmallVectorImpl<ISD::OutputArg> &Outs,
                       const SmallVectorImpl<SDValue> &OutVals,
                       const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

index 1242c3e..2492229 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -29,6 +29,7 @@
  #include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/CodeGen/ScheduleDAG.h"
  #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackMaps.h"
  #include "llvm/MC/MCAsmInfo.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
@@ -1596,6 +1597,11 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
      const MachineFunction *MF = MI->getParent()->getParent();
      const char *AsmStr = MI->getOperand(0).getSymbolName();
      return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+  } else if (Opcode == TargetOpcode::STACKMAP) {
+    return MI->getOperand(1).getImm();
+  } else if (Opcode == TargetOpcode::PATCHPOINT) {
+    PatchPointOpers Opers(MI);
+    return Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
    } else {
      const MCInstrDesc &Desc = get(Opcode);
      return Desc.getSize();
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp

index 9b9966f..aa0da7a 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -99,6 +99,14 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
  
  const MCPhysReg*
  PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) {
+    if (Subtarget.hasVSX())
+      return CSR_64_AllRegs_VSX_SaveList;
+    if (Subtarget.hasAltivec())
+      return CSR_64_AllRegs_Altivec_SaveList;
+    return CSR_64_AllRegs_SaveList;
+  }
+
    if (Subtarget.isDarwinABI())
      return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
                                    CSR_Darwin64_Altivec_SaveList :
@@ -117,6 +125,14 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
  
  const uint32_t*
  PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+  if (CC == CallingConv::AnyReg) {
+    if (Subtarget.hasVSX())
+      return CSR_64_AllRegs_VSX_RegMask;
+    if (Subtarget.hasAltivec())
+      return CSR_64_AllRegs_Altivec_RegMask;
+    return CSR_64_AllRegs_RegMask;
+  }
+
    if (Subtarget.isDarwinABI())
      return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
                                    CSR_Darwin64_Altivec_RegMask :
@@ -138,6 +154,14 @@ PPCRegisterInfo::getNoPreservedMask() const {
    return CSR_NoRegs_RegMask;
  }
  
+void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
+  unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM };
+  for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) {
+    unsigned Reg = PseudoRegs[i];
+    Mask[Reg / 32] &= ~(1u << (Reg % 32));
+  }
+}
+
  BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
    BitVector Reserved(getNumRegs());
    const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
@@ -700,7 +724,10 @@ static unsigned getOffsetONFromFION(const MachineInstr &MI,
    // Take into account whether it's an add or mem instruction
    unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
    if (MI.isInlineAsm())
-    OffsetOperandNo = FIOperandNum-1;
+    OffsetOperandNo = FIOperandNum - 1;
+  else if (MI.getOpcode() == TargetOpcode::STACKMAP ||
+           MI.getOpcode() == TargetOpcode::PATCHPOINT)
+    OffsetOperandNo = FIOperandNum + 1;
  
    return OffsetOperandNo;
  }
@@ -772,7 +799,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
  
    // If the instruction is not present in ImmToIdxMap, then it has no immediate
    // form (and must be r+r).
-  bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC);
+  bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP &&
+                   OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC);
  
    // Now add the frame object offset to the offset from r1.
    int Offset = MFI->getObjectOffset(FrameIndex);
@@ -796,8 +824,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
    // only "std" to a stack slot that is at least 4-byte aligned, but it can
    // happen in invalid code.
    assert(OpC != PPC::DBG_VALUE &&
-         "This should be handle in a target independent way");
-  if (!noImmForm && isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+         "This should be handled in a target-independent way");
+  if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) ||
+                     OpC == TargetOpcode::STACKMAP ||
+                     OpC == TargetOpcode::PATCHPOINT)) {
      MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
      return;
    }
@@ -1008,6 +1038,8 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
    Offset += MI->getOperand(OffsetOperandNo).getImm();
  
    return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+         MI->getOpcode() == TargetOpcode::STACKMAP ||
+         MI->getOpcode() == TargetOpcode::PATCHPOINT ||
           (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
  }
  
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h

index c182f95..4c2ef90 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -49,6 +49,8 @@ public:
    const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
    const uint32_t *getNoPreservedMask() const;
  
+  void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
+
    BitVector getReservedRegs(const MachineFunction &MF) const override;
  
    /// We require the register scavenger.
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

index fc440a5..a7bb252 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -183,6 +183,15 @@ unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
      if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
        return TCC_Free;
      break;
+  case Intrinsic::experimental_stackmap:
+    if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+      return TCC_Free;
+    break;
+  case Intrinsic::experimental_patchpoint_void:
+  case Intrinsic::experimental_patchpoint_i64:
+    if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+      return TCC_Free;
+    break;
    }
    return PPCTTI::getIntImmCost(Imm, Ty);
  }
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll b/llvm/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll

new file mode 100644 (file)

index 0000000..479c7a7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll
@@ -0,0 +1,19 @@
+; RUN: not llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+;
+; Check that misuse of anyregcc results in a compile time error.
+
+; CHECK: LLVM ERROR: ran out of registers during register allocation
+define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
+                        i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
+                        i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
+                        i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) {
+entry:
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
+                i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
+                i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
+                i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
+                i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32)
+  ret i64 %result
+}
+
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-anyregcc.ll b/llvm/test/CodeGen/PowerPC/ppc64-anyregcc.ll

new file mode 100644 (file)

index 0000000..7cd3c4b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc64-anyregcc.ll
@@ -0,0 +1,367 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Stackmap Header: no constants - 6 callsites
+; CHECK-LABEL: .section        .llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 8
+; Num LargeConstants
+; CHECK-NEXT:   .long 0
+; Num Callsites
+; CHECK-NEXT:   .long 8
+
+; Functions and stack size
+; CHECK-NEXT:   .quad test
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad property_access1
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad property_access2
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad property_access3
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad anyreg_test1
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad anyreg_test2
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad patchpoint_spilldef
+; CHECK-NEXT:   .quad 256
+; CHECK-NEXT:   .quad patchpoint_spillargs
+; CHECK-NEXT:   .quad 288
+
+
+; test
+; CHECK-LABEL:  .long   .L{{.*}}-.L.test
+; CHECK-NEXT:   .short  0
+; 3 locations
+; CHECK-NEXT:   .short  3
+; Loc 0: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Constant 3
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long 3
+define i64 @test() nounwind ssp uwtable {
+entry:
+  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 24, i8* null, i32 2, i32 1, i32 2, i64 3)
+  ret i64 0
+}
+
+; property access 1 - %obj is an anyreg call argument and should therefore be in a register
+; CHECK-LABEL:  .long   .L{{.*}}-.L.property_access1
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 281474417671919 to i8*
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 24, i8* %f, i32 1, i8* %obj)
+  ret i64 %ret
+}
+
+; property access 2 - %obj is an anyreg call argument and should therefore be in a register
+; CHECK-LABEL:  .long   .L{{.*}}-.L.property_access2
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @property_access2() nounwind ssp uwtable {
+entry:
+  %obj = alloca i64, align 8
+  %f = inttoptr i64 281474417671919 to i8*
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %f, i32 1, i64* %obj)
+  ret i64 %ret
+}
+
+; property access 3 - %obj is a frame index
+; CHECK-LABEL:  .long   .L{{.*}}-.L.property_access3
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Direct FP - 8
+; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short 31
+; CHECK-NEXT:   .long 112
+define i64 @property_access3() nounwind ssp uwtable {
+entry:
+  %obj = alloca i64, align 8
+  %f = inttoptr i64 281474417671919 to i8*
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 24, i8* %f, i32 0, i64* %obj)
+  ret i64 %ret
+}
+
+; anyreg_test1
+; CHECK-LABEL:  .long   .L{{.*}}-.L.anyreg_test1
+; CHECK-NEXT:   .short  0
+; 14 locations
+; CHECK-NEXT:   .short  14
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 3: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 4: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 5: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 6: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 7: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 8: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 9: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 10: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 11: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 12: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 13: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 281474417671919 to i8*
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 24, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  ret i64 %ret
+}
+
+; anyreg_test2
+; CHECK-LABEL:  .long   .L{{.*}}-.L.anyreg_test2
+; CHECK-NEXT:   .short  0
+; 14 locations
+; CHECK-NEXT:   .short  14
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 3: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 4: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 5: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 6: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 7: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 8: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 9: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 10: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 11: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 12: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 13: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 281474417671919 to i8*
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  ret i64 %ret
+}
+
+; Test spilling the return value of an anyregcc call.
+;
+; <rdar://problem/15432754> [JS] Assertion: "Folded a def to a non-store!"
+;
+; CHECK-LABEL: .long .L{{.*}}-.L.patchpoint_spilldef
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 3
+; Loc 0: Register (some register that will be spilled to the stack)
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 1: Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 1: Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
+  tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17
+},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
+  ret i64 %result
+}
+
+; Test spilling the arguments of an anyregcc call.
+;
+; <rdar://problem/15487687> [JS] AnyRegCC argument ends up being spilled
+;
+; CHECK-LABEL: .long .L{{.*}}-.L.patchpoint_spillargs
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 5
+; Loc 0: Return a register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 1: Arg0 in a Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 2: Arg1 in a Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 3: Arg2 spilled to FP -96
+; CHECK-NEXT: .byte  3
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 31
+; CHECK-NEXT: .long 128
+; Loc 4: Arg3 spilled to FP - 88
+; CHECK-NEXT: .byte  3
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 31
+; CHECK-NEXT: .long 136
+define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+  tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17
+},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  ret i64 %result
+}
+
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-patchpoint.ll b/llvm/test/CodeGen/PowerPC/ppc64-patchpoint.ll

new file mode 100644 (file)

index 0000000..5e58fda
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc64-patchpoint.ll
@@ -0,0 +1,93 @@
+; RUN: llc                             < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Trivial patchpoint codegen
+;
+define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: trivial_patchpoint_codegen:
+
+; CHECK: li 11, -8531
+; CHECK-NEXT: rldic 11, 11, 32, 16
+; CHECK-NEXT: oris 11, 11, 48879
+; CHECK-NEXT: ori 11, 11, 51966
+; CHECK-NEXT: mtctr 11
+; CHECK-NEXT: bctrl
+
+; CHECK: li 11, -8531
+; CHECK-NEXT: rldic 11, 11, 32, 16
+; CHECK-NEXT: oris 11, 11, 48879
+; CHECK-NEXT: ori 11, 11, 51967
+; CHECK-NEXT: mtctr 11
+; CHECK-NEXT: bctrl
+
+; CHECK: blr
+
+  %resolveCall2 = inttoptr i64 244837814094590 to i8*
+  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %resolveCall3 = inttoptr i64 244837814094591 to i8*
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 24, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  ret i64 %result
+}
+
+; Caller frame metadata with stackmaps. This should not be optimized
+; as a leaf function.
+;
+; CHECK-LABEL: caller_meta_leaf
+; CHECK: stdu 1, -80(1)
+; CHECK: Ltmp
+; CHECK: addi 1, 1, 80
+; CHECK: blr
+
+define void @caller_meta_leaf() {
+entry:
+  %metadata = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata
+  store i64 12, i64* %metadata
+  store i64 13, i64* %metadata
+  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+  ret void
+}
+
+; Test patchpoints reusing the same TargetConstant.
+; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
+; There is no way to verify this, since it depends on memory allocation.
+; But I think it's useful to include as a working example.
+define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64 %tmp79) {
+entry:
+  %tmp80 = add i64 %tmp79, -16
+  %tmp81 = inttoptr i64 %tmp80 to i64*
+  %tmp82 = load i64* %tmp81, align 8
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+  %tmp83 = load i64* %tmp33, align 8
+  %tmp84 = add i64 %tmp83, -24
+  %tmp85 = inttoptr i64 %tmp84 to i64*
+  %tmp86 = load i64* %tmp85, align 8
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+  ret i64 10
+}
+
+; Test small patchpoints that don't emit calls.
+define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: small_patchpoint_codegen:
+; CHECK:      Ltmp
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NOT:  nop
+; CHECK: blr
+  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll b/llvm/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll

new file mode 100644 (file)

index 0000000..368ddc5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-gnu-linux | FileCheck %s
+
+define void @test_shadow_optimization() {
+entry:
+; Expect 12 bytes worth of nops here rather than 32: With the shadow optimization
+; in place, 20 bytes will be consumed by the frame teardown and return instr.
+; CHECK-LABEL: test_shadow_optimization:
+
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NOT:  nop
+; CHECK: addi 1, 1, 64
+; CHECK: ld [[REG1:[0-9]+]], 16(1)
+; CHECK: ld 31, -8(1)
+; CHECK: mtlr [[REG1]]
+; CHECK: blr
+
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  0, i32  32)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll b/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll

new file mode 100644 (file)

index 0000000..9be8d0c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll
@@ -0,0 +1,289 @@
+; RUN: llc                             < %s | FileCheck %s
+;
+; Note: Print verbose stackmaps using -debug-only=stackmaps.
+
+; We are not getting the correct stack alignment when cross compiling for arm64.
+; So specify a datalayout here.
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; CHECK-LABEL:  .section  .llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 11
+; Num LargeConstants
+; CHECK-NEXT:   .long 2
+; Num Callsites
+; CHECK-NEXT:   .long 11
+
+; Functions and stack size
+; CHECK-NEXT:   .quad constantargs
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad osrinline
+; CHECK-NEXT:   .quad 144
+; CHECK-NEXT:   .quad osrcold
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad propertyRead
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad propertyWrite
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad jsVoidCall
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad jsIntCall
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad spilledValue
+; CHECK-NEXT:   .quad 320
+; CHECK-NEXT:   .quad spilledStackMapValue
+; CHECK-NEXT:   .quad 224
+; CHECK-NEXT:   .quad liveConstant
+; CHECK-NEXT:   .quad 64
+; CHECK-NEXT:   .quad clobberLR
+; CHECK-NEXT:   .quad 208
+
+; Num LargeConstants
+; CHECK-NEXT:   .quad   4294967295
+; CHECK-NEXT:   .quad   4294967296
+
+; Constant arguments
+;
+; CHECK-NEXT:   .quad   1
+; CHECK-NEXT:   .long   .L{{.*}}-.L.constantargs
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  4
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65535
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65536
+; SmallConstant
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; LargeConstant at index 0
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   1
+
+define void @constantargs() {
+entry:
+  %0 = inttoptr i64 244837814094590 to i8*
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 24, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
+  ret void
+}
+
+; Inline OSR Exit
+;
+; CHECK-LABEL:  .long   .L{{.*}}-.L.osrinline
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long  0
+define void @osrinline(i64 %a, i64 %b) {
+entry:
+  ; Runtime void->void call.
+  call void inttoptr (i64 244837814094590 to void ()*)()
+  ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
+  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
+  ret void
+}
+
+; Cold OSR Exit
+;
+; 2 live variables in register.
+;
+; CHECK-LABEL:  .long   .L{{.*}}-.L.osrcold
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long  0
+define void @osrcold(i64 %a, i64 %b) {
+entry:
+  %test = icmp slt i64 %a, %b
+  br i1 %test, label %ret, label %cold
+cold:
+  ; OSR patchpoint with 12-byte nop-slide and 2 live vars.
+  %thunk = inttoptr i64 244837814094590 to i8*
+  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4, i32 24, i8* %thunk, i32 0, i64 %a, i64 %b)
+  unreachable
+ret:
+  ret void
+}
+
+; Property Read
+; CHECK-LABEL:  .long   .L{{.*}}-.L.propertyRead
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+;
+; FIXME: There are currently no stackmap entries. After moving to
+; AnyRegCC, we will have entries for the object and return value.
+define i64 @propertyRead(i64* %obj) {
+entry:
+  %resolveRead = inttoptr i64 244837814094590 to i8*
+  %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %resolveRead, i32 1, i64* %obj)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Property Write
+; CHECK-LABEL:  .long   .L{{.*}}-.L.propertyWrite
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
+entry:
+  %resolveWrite = inttoptr i64 244837814094590 to i8*
+  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 24, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+  ret void
+}
+
+; Void JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK-LABEL:  .long   .L{{.*}}-.L.jsVoidCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 244837814094590 to i8*
+  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 7, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  ret void
+}
+
+; i64 JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK-LABEL:  .long   .L{{.*}}-.L.jsIntCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 244837814094590 to i8*
+  %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 8, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Spilled stack map values.
+;
+; Verify 28 stack map entries.
+;
+; CHECK-LABEL:  .long .L{{.*}}-.L.spilledValue
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short 28
+;
+; Check that at least one is a spilled entry from r31.
+; Location: Indirect FP + ...
+; CHECK:        .byte 3
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short 31
+define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
+entry:
+  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 11, i32 24, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
+  ret void
+}
+
+; Spilled stack map values.
+;
+; Verify 30 stack map entries.
+;
+; CHECK-LABEL:  .long .L{{.*}}-.L.spilledStackMapValue
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short 30
+;
+; Check that at least one is a spilled entry from r31.
+; Location: Indirect FP + ...
+; CHECK:        .byte 3
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short 31
+define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) {
+entry:
+  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
+  ret void
+}
+
+
+; Map a constant value.
+;
+; CHECK-LABEL:  .long .L{{.*}}-.L.liveConstant
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short 1
+; Loc 0: SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   33
+
+define void @liveConstant() {
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
+  ret void
+}
+
+; Map a value when LR is the only free register.
+;
+; CHECK-LABEL:  .long .L{{.*}}-.L.clobberLR
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short 1
+; Loc 0: Indirect FP (r31) - offset
+; CHECK-NEXT:   .byte   3
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .short  31
+; CHECK-NEXT:   .long   {{[0-9]+}}
+define void @clobberLR(i32 %a) {
+  tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
author	Hal Finkel <hfinkel@anl.gov>
	Tue, 13 Jan 2015 17:48:12 +0000 (17:48 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Tue, 13 Jan 2015 17:48:12 +0000 (17:48 +0000)
llvm/docs/StackMaps.rst		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCCallingConv.h	[new file with mode: 0644]	patch \| blob
llvm/lib/Target/PowerPC/PPCCallingConv.td		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCFastISel.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCRegisterInfo.h		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/PowerPC/ppc64-anyregcc.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/PowerPC/ppc64-patchpoint.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/PowerPC/ppc64-stackmap.ll	[new file with mode: 0644]	patch \| blob