From d0e42037bf0ce110cc8c6e9b536687fa40e90b99 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Thu, 10 Sep 2020 11:21:19 -0700 Subject: [PATCH] [CSSPGO] MIR target-independent pseudo instruction for pseudo-probe intrinsic This change introduces a MIR target-independent pseudo instruction corresponding to the IR intrinsic llvm.pseudoprobe for pseudo-probe block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story. An `llvm.pseudoprobe` intrinsic call will be lowered into a target-independent operation named `PSEUDO_PROBE`. Given the following instrumented IR, ``` define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 { bb0: %cmp = icmp eq i32 %x, 0 call void @llvm.pseudoprobe(i64 837061429793323041, i64 1) br i1 %cmp, label %bb1, label %bb2 bb1: call void @llvm.pseudoprobe(i64 837061429793323041, i64 2) br label %bb3 bb2: call void @llvm.pseudoprobe(i64 837061429793323041, i64 3) br label %bb3 bb3: call void @llvm.pseudoprobe(i64 837061429793323041, i64 4) ret void } ``` the corresponding MIR is shown below. Note that block `bb3` is duplicated into `bb1` and `bb2` where its probe is duplicated too. This allows for an accurate execution count to be collected for `bb3`, which is basically the sum of the counts of `bb1` and `bb2`. ``` bb.0.bb0: frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags PSEUDO_PROBE 837061429793323041, 1, 0 $edi = MOV32ri 1, debug-location !13; test.c:0 JCC_1 %bb.1, 4, implicit $eflags bb.2.bb2: PSEUDO_PROBE 837061429793323041, 3, 0 PSEUDO_PROBE 837061429793323041, 4, 0 $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp RETQ bb.1.bb1: PSEUDO_PROBE 837061429793323041, 2, 0 PSEUDO_PROBE 837061429793323041, 4, 0 $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp RETQ ``` The target op PSEUDO_PROBE will be converted into a piece of binary data by the object emitter with no machine instructions generated. This is done in a different patch. Reviewed By: wmi Differential Revision: https://reviews.llvm.org/D86495 --- llvm/include/llvm/CodeGen/ISDOpcodes.h | 4 +++ llvm/include/llvm/CodeGen/MachineInstr.h | 1 + llvm/include/llvm/CodeGen/SelectionDAG.h | 6 +++++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 26 +++++++++++++++++++ llvm/include/llvm/Support/TargetOpcodes.def | 3 +++ llvm/include/llvm/Target/Target.td | 7 ++++++ llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 14 +++++++++++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 29 ++++++++++++++++++++++ .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 8 ++++++ .../CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 2 ++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 1 + .../test/Transforms/SampleProfile/pseudo-probe.mir | 29 ++++++++++++++++++++++ 12 files changed, 130 insertions(+) create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe.mir diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 269bb14..2cacb57 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1106,6 +1106,10 @@ enum NodeType { /// known nonzero constant. The only operand here is the chain. GET_DYNAMIC_AREA_OFFSET, + /// Pseudo probe for AutoFDO, as a place holder in a basic block to improve + /// the sample counts quality. + PSEUDO_PROBE, + /// VSCALE(IMM) - Returns the runtime scaling factor used to calculate the /// number of elements within a scalable vector. IMM is a constant integer /// multiplier that is applied to the runtime value. diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 6247f2a..6bbe2d0 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1261,6 +1261,7 @@ public: case TargetOpcode::DBG_LABEL: case TargetOpcode::LIFETIME_START: case TargetOpcode::LIFETIME_END: + case TargetOpcode::PSEUDO_PROBE: return true; } } diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 42fc296..8966e7f5 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1205,6 +1205,12 @@ public: SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain, int FrameIndex, int64_t Size, int64_t Offset = -1); + /// Creates a PseudoProbeSDNode with function GUID `Guid` and + /// the index of the block `Index` it is probing, as well as the attributes + /// `attr` of the probe. + SDValue getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, uint64_t Guid, + uint64_t Index, uint32_t Attr); + /// Create a MERGE_VALUES node from the given operands. SDValue getMergeValues(ArrayRef Ops, const SDLoc &dl); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index eaad25c..83158e2 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1749,6 +1749,32 @@ public: } }; +/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and +/// the index of the basic block being probed. A pseudo probe serves as a place +/// holder and will be removed at the end of compilation. It does not have any +/// operand because we do not want the instruction selection to deal with any. +class PseudoProbeSDNode : public SDNode { + friend class SelectionDAG; + uint64_t Guid; + uint64_t Index; + uint32_t Attributes; + + PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl, + SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr) + : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index), + Attributes(Attr) {} + +public: + uint64_t getGuid() const { return Guid; } + uint64_t getIndex() const { return Index; } + uint32_t getAttributes() const { return Attributes; } + + // Methods to support isa and dyn_cast + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::PSEUDO_PROBE; + } +}; + class JumpTableSDNode : public SDNode { friend class SelectionDAG; diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 38eb51b..a63d404 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -110,6 +110,9 @@ HANDLE_TARGET_OPCODE(BUNDLE) HANDLE_TARGET_OPCODE(LIFETIME_START) HANDLE_TARGET_OPCODE(LIFETIME_END) +/// Pseudo probe +HANDLE_TARGET_OPCODE(PSEUDO_PROBE) + /// A Stackmap instruction captures the location of live variables at its /// position in the instruction stream. It is followed by a shadow of bytes /// that must lie within the function and not contain another stackmap. diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index 96fbfcc..9664f70 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1145,6 +1145,13 @@ def LIFETIME_END : StandardPseudoInstruction { let AsmString = "LIFETIME_END"; let hasSideEffects = false; } +def PSEUDO_PROBE : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins i64imm:$guid, i64imm:$index, i8imm:$type, i32imm:$attr); + let AsmString = "PSEUDO_PROBE"; + let hasSideEffects = 1; +} + def STACKMAP : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i64imm:$id, i32imm:$nbytes, variable_ops); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 95272d5..2b208cee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1124,6 +1124,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } + case ISD::PSEUDO_PROBE: { + unsigned TarOp = TargetOpcode::PSEUDO_PROBE; + auto Guid = cast(Node)->getGuid(); + auto Index = cast(Node)->getIndex(); + auto Attr = cast(Node)->getAttributes(); + + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addImm(Guid) + .addImm(Index) + .addImm(0) // 0 for block probes + .addImm(Attr); + break; + } + case ISD::INLINEASM: case ISD::INLINEASM_BR: { unsigned NumOps = Node->getNumOperands(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2ae6f1c..489651e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -572,6 +572,11 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(cast(N)->getOffset()); } break; + case ISD::PSEUDO_PROBE: + ID.AddInteger(cast(N)->getGuid()); + ID.AddInteger(cast(N)->getIndex()); + ID.AddInteger(cast(N)->getAttributes()); + break; case ISD::JumpTable: case ISD::TargetJumpTable: ID.AddInteger(cast(N)->getIndex()); @@ -6883,6 +6888,30 @@ SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, return V; } +SDValue SelectionDAG::getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, + uint64_t Guid, uint64_t Index, + uint32_t Attr) { + const unsigned Opcode = ISD::PSEUDO_PROBE; + const auto VTs = getVTList(MVT::Other); + SDValue Ops[] = {Chain}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + ID.AddInteger(Guid); + ID.AddInteger(Index); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, Dl, IP)) + return SDValue(E, 0); + + auto *N = newSDNode( + Opcode, Dl.getIROrder(), Dl.getDebugLoc(), VTs, Guid, Index, Attr); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 82056aa..959bb52 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6590,6 +6590,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } return; } + case Intrinsic::pseudoprobe: { + auto Guid = cast(I.getArgOperand(0))->getZExtValue(); + auto Index = cast(I.getArgOperand(1))->getZExtValue(); + auto Attr = cast(I.getArgOperand(2))->getZExtValue(); + Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr); + DAG.setRoot(Res); + return; + } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index df35f05..17a4a5c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -396,6 +396,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::DEBUGTRAP: return "debugtrap"; case ISD::LIFETIME_START: return "lifetime.start"; case ISD::LIFETIME_END: return "lifetime.end"; + case ISD::PSEUDO_PROBE: + return "pseudoprobe"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3f3eb35..6c73842 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2808,6 +2808,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::ANNOTATION_LABEL: case ISD::LIFETIME_START: case ISD::LIFETIME_END: + case ISD::PSEUDO_PROBE: NodeToMatch->setNodeId(-1); // Mark selected. return; case ISD::AssertSext: diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe.mir b/llvm/test/Transforms/SampleProfile/pseudo-probe.mir new file mode 100644 index 0000000..8175a47 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe.mir @@ -0,0 +1,29 @@ + +# REQUIRES: x86-registered-target +# Ensure llc can read and parse MIR pseudo probe operations. +# RUN: llc -O0 -mtriple x86_64-- -run-pass none %s -o - | FileCheck %s + +# CHECK: PSEUDO_PROBE 6699318081062747564, 1, 0, 0 +# CHECK: PSEUDO_PROBE 6699318081062747564, 3, 0, 0 +# CHECK: PSEUDO_PROBE 6699318081062747564, 4, 0, 0 +# CHECK: PSEUDO_PROBE 6699318081062747564, 2, 0, 0 +# CHECK: PSEUDO_PROBE 6699318081062747564, 4, 0, 0 + +name: foo +body: | + bb.0: + TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags + PSEUDO_PROBE 6699318081062747564, 1, 0, 0 + JCC_1 %bb.1, 4, implicit $eflags + + bb.2: + PSEUDO_PROBE 6699318081062747564, 3, 0, 0 + PSEUDO_PROBE 6699318081062747564, 4, 0, 0 + RETQ + + bb.1: + PSEUDO_PROBE 6699318081062747564, 2, 0, 0 + PSEUDO_PROBE 6699318081062747564, 4, 0, 0 + RETQ + +... -- 2.7.4