Dangling probes are the probes associated to an empty block. This usually happens when all real instructions are optimized away from the block. There is a problem with dangling probes during the offline counts processing. The way the sample profiler works is that samples collected on the first physical instruction following a probe will be counted towards the probe. This logically equals to treating the instruction next to a probe as if it is from the same block of the probe. In the dangling probe case, the real instruction following a dangling probe actually starts a new block, and samples collected on the new block may cause issues when counted towards the empty block.
To mitigate this issue, we first try to move around a dangling probe inside its owning block. If there are still native instructions preceding the probe in the same block, we can then use them as a place holder to collect samples for the probe. A pass is added to walk each block backwards looking for probes not followed by any real instruction and moving them before the first real instruction. This is done right before the object emission.
If we are unlucky to find such in-block preceding instructions for a probe, the solution we are taking is to tag such probe as dangling so that the samples reported for them will not be trusted by the compiler. We leave it up to the counts inference algorithm to get such probes a reasonable count. The number `UINT64_MAX` is used to mark sample count as collected for a dangling probe.
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D95962
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ArrayRecycler.h"
bool isPseudoProbe() const {
return getOpcode() == TargetOpcode::PSEUDO_PROBE;
}
-
+
// True if the instruction represents a position in the function.
bool isPosition() const { return isLabel() || isCFIInstruction(); }
}
}
+ PseudoProbeAttributes getPseudoProbeAttribute() const {
+ assert(isPseudoProbe() && "Must be a pseudo probe instruction");
+ return (PseudoProbeAttributes)getOperand(3).getImm();
+ }
+
+ void addPseudoProbeAttribute(PseudoProbeAttributes Attr) {
+ assert(isPseudoProbe() && "Must be a pseudo probe instruction");
+ MachineOperand &AttrOperand = getOperand(3);
+ AttrOperand.setImm(AttrOperand.getImm() | (uint32_t)Attr);
+ }
+
private:
/// If this instruction is embedded into a MachineFunction, return the
/// MachineRegisterInfo object for the current function, otherwise
enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
+enum class PseudoProbeAttributes {
+ Reserved = 0x1, // Reserved for future use.
+ Dangling = 0x2, // The probe is dangling.
+};
+
// The saturated distrution factor representing 100% for block probes.
constexpr static uint64_t PseudoProbeFullDistributionFactor =
std::numeric_limits<uint64_t>::max();
// TYPE (uint4)
// 0 - block probe, 1 - indirect call, 2 - direct call
// ATTRIBUTE (uint3)
-// reserved
+// 1 - reserved, 2 - dangling
// ADDRESS_TYPE (uint1)
// 0 - code address, 1 - address delta
// CODE_ADDRESS (uint64 or ULEB128)
/// Merge the samples in \p Other into this record.
/// Optionally scale sample counts by \p Weight.
- sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
- sampleprof_error Result = addSamples(Other.getSamples(), Weight);
- for (const auto &I : Other.getCallTargets()) {
- MergeResult(Result, addCalledTarget(I.first(), I.second, Weight));
- }
- return Result;
- }
-
+ sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1);
void print(raw_ostream &OS, unsigned Indent) const;
void dump() const;
// For CSSPGO, in order to conserve profile size, we no longer write out
// locations profile for those not hit during training, so we need to
// treat them as zero instead of error here.
- if (ProfileIsCS)
- return 0;
- return std::error_code();
- // A missing counter for a probe likely means the probe was not executed.
- // Treat it as a zero count instead of an unknown count to help edge
- // weight inference.
- if (FunctionSamples::ProfileIsProbeBased)
+ if (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased)
return 0;
return std::error_code();
} else {
+ // Return error for an invalid sample count which is usually assigned to
+ // dangling probe.
+ if (FunctionSamples::ProfileIsProbeBased &&
+ ret->second.getSamples() == FunctionSamples::InvalidProbeCount)
+ return std::error_code();
return ret->second.getSamples();
}
}
const DILocation *DIL,
SampleProfileReaderItaniumRemapper *Remapper = nullptr) const;
+ // The invalid sample count is used to represent samples collected for a
+ // dangling probe.
+ static constexpr uint64_t InvalidProbeCount = UINT64_MAX;
+
static bool ProfileIsProbeBased;
static bool ProfileIsCS;
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Target/TargetMachine.h"
#include <unordered_map>
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
+ MachineInstr *FirstInstr = nullptr;
for (MachineInstr &MI : MBB) {
+ if (!MI.isPseudo())
+ FirstInstr = &MI;
if (MI.isCall()) {
if (DILocation *DL = MI.getDebugLoc()) {
auto Value = DL->getDiscriminator();
}
}
}
+
+ // Walk the block backwards, move PSEUDO_PROBE before the first real
+ // instruction to fix out-of-order probes. There is a problem with probes
+ // as the terminator of the block. During the offline counts processing,
+ // the samples collected on the first physical instruction following a
+ // probe will be counted towards the probe. This logically equals to
+ // treating the instruction next to a probe as if it is from the same
+ // block of the probe. This is accurate most of the time unless the
+ // instruction can be reached from multiple flows, which means it actually
+ // starts a new block. Samples collected on such probes may cause
+ // imprecision with the counts inference algorithm. Fortunately, if
+ // there are still other native instructions preceding the probe we can
+ // use them as a place holder to collect samples for the probe.
+ if (FirstInstr) {
+ auto MII = MBB.rbegin();
+ while (MII != MBB.rend()) {
+ // Skip all pseudo probes followed by a real instruction since they
+ // are not dangling.
+ if (!MII->isPseudo())
+ break;
+ auto Cur = MII++;
+ if (Cur->getOpcode() != TargetOpcode::PSEUDO_PROBE)
+ continue;
+ // Move the dangling probe before FirstInstr.
+ auto *ProbeInstr = &*Cur;
+ MBB.remove(ProbeInstr);
+ MBB.insert(FirstInstr, ProbeInstr);
+ Changed = true;
+ }
+ } else {
+ // Probes not surrounded by any real instructions in the same block are
+ // called dangling probes. Since there's no good way to pick up a sample
+ // collection point for dangling probes at compile time, they are being
+ // tagged so that the profile correlation tool will not report any
+ // samples collected for them and it's up to the counts inference tool
+ // to get them a reasonable count.
+ for (MachineInstr &MI : MBB) {
+ if (MI.isPseudoProbe())
+ MI.addPseudoProbeAttribute(PseudoProbeAttributes::Dangling);
+ }
+ }
}
return Changed;
return OS;
}
+/// Merge the samples in \p Other into this record.
+/// Optionally scale sample counts by \p Weight.
+sampleprof_error SampleRecord::merge(const SampleRecord &Other,
+ uint64_t Weight) {
+ sampleprof_error Result;
+ // With pseudo probes, merge a dangling sample with a non-dangling sample
+ // should result in a dangling sample.
+ if (FunctionSamples::ProfileIsProbeBased &&
+ (getSamples() == FunctionSamples::InvalidProbeCount ||
+ Other.getSamples() == FunctionSamples::InvalidProbeCount)) {
+ NumSamples = FunctionSamples::InvalidProbeCount;
+ Result = sampleprof_error::success;
+ } else {
+ Result = addSamples(Other.getSamples(), Weight);
+ }
+ for (const auto &I : Other.getCallTargets()) {
+ MergeResult(Result, addCalledTarget(I.first(), I.second, Weight));
+ }
+ return Result;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LineLocation::dump() const { print(dbgs()); }
#endif
--- /dev/null
+
+# REQUIRES: x86-registered-target
+# Ensure llc can read and parse MIR pseudo probe operations.
+# RUN: llc -mtriple x86_64-- -run-pass=pseudo-probe-inserter %s -o - | FileCheck %s
+
+# CHECK: PSEUDO_PROBE 6699318081062747564, 1, 0, 0
+# check probe 2 is moved before the test instruction.
+# CHECK: PSEUDO_PROBE 6699318081062747564, 2, 0, 0
+# CHECK: TEST32rr
+# check probe 3 is dangling.
+# CHECK: PSEUDO_PROBE 6699318081062747564, 3, 0, 2
+
+name: foo
+body: |
+ bb.0:
+ TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags
+ PSEUDO_PROBE 6699318081062747564, 1, 0, 0
+ JCC_1 %bb.1, 4, implicit $eflags
+
+ bb.2:
+ TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags
+ PSEUDO_PROBE 6699318081062747564, 2, 0, 0
+
+ bb.1:
+ PSEUDO_PROBE 6699318081062747564, 3, 0, 0
+
+...
foo:3200:13
1: 13
2: 7
- 3: 6
+ 3: 18446744073709551615
4: 13
5: 7 _Z3foov:5 _Z3barv:2
6: 6 _Z3barv:4 _Z3foov:2
# Tests for merge of probe-based profile files.
+# Check the dangling probe 3 ends up with 18446744073709551615 (INT64_MAX), i.e, not aggregated.
RUN: llvm-profdata merge --sample --text %p/Inputs/pseudo-probe-profile.proftext -o - | FileCheck %s --check-prefix=MERGE1
RUN: llvm-profdata merge --sample --extbinary %p/Inputs/pseudo-probe-profile.proftext -o %t && llvm-profdata merge --sample --text %t -o - | FileCheck %s --check-prefix=MERGE1
MERGE1: foo:3200:13
MERGE1: 1: 13
MERGE1: 2: 7
-MERGE1: 3: 6
+MERGE1: 3: 18446744073709551615
MERGE1: 4: 13
MERGE1: 5: 7 _Z3foov:5 _Z3barv:2
MERGE1: 6: 6 _Z3barv:4 _Z3foov:2
MERGE2: foo:6400:26
MERGE2: 1: 26
MERGE2: 2: 14
-MERGE2: 3: 12
+MERGE2: 3: 18446744073709551615
MERGE2: 4: 26
MERGE2: 5: 14 _Z3foov:10 _Z3barv:4
MERGE2: 6: 12 _Z3barv:8 _Z3foov:4