Same dangling probes are redundant since they all have the same semantic that is to rely on the counts inference tool to get reasonable count for the same original block. Therefore, there's no need to keep multiple copies of them. I've seen jump threading created tons of redundant dangling probes that slowed down the compiler dramatically. Other optimization passes can also result in redundant probes though without an observed impact so far.
This change removes block-wise redundant dangling probes specifically introduced by jump threading. To support removing redundant dangling probes caused by all other passes, a final function-wise deduplication is also added.
An 18% size win of the .pseudo_probe section was seen for SPEC2017. No performance difference was observed.
Differential Revision: https://reviews.llvm.org/D97482
void setProbeDistributionFactor(Instruction &Inst, float Factor);
bool moveAndDanglePseudoProbes(BasicBlock *From, Instruction *To);
+
+bool removeRedundantPseudoProbes(BasicBlock *Block);
} // end namespace llvm
#endif // LLVM_IR_PSEUDOPROBE_H
DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr);
/// Try to remove redundant dbg.value instructions from given basic block.
-/// Returns true if at least one instruction was removed.
-bool RemoveRedundantDbgInstrs(BasicBlock *BB);
+/// Returns true if at least one instruction was removed. Remove redundant
+/// pseudo ops when RemovePseudoOp is true.
+bool RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp = false);
/// Replace all uses of an instruction (specified by BI) with a value, then
/// remove and delete the original instruction.
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Target/TargetMachine.h"
-#include <unordered_map>
+#include <unordered_set>
#define DEBUG_TYPE "pseudo-probe-inserter"
}
}
+ // Remove redundant dangling probes. Same dangling probes are redundant
+ // since they all have the same semantic that is to rely on the counts
+ // inference too to get reasonable count for the same original block.
+ // Therefore, there's no need to keep multiple copies of them.
+ auto Hash = [](const MachineInstr *MI) {
+ return std::hash<uint64_t>()(MI->getOperand(0).getImm()) ^
+ std::hash<uint64_t>()(MI->getOperand(1).getImm());
+ };
+
+ auto IsEqual = [](const MachineInstr *Left, const MachineInstr *Right) {
+ return Left->getOperand(0).getImm() == Right->getOperand(0).getImm() &&
+ Left->getOperand(1).getImm() == Right->getOperand(1).getImm() &&
+ Left->getOperand(3).getImm() == Right->getOperand(3).getImm() &&
+ Left->getDebugLoc() == Right->getDebugLoc();
+ };
+
+ SmallVector<MachineInstr *, 4> ToBeRemoved;
+ std::unordered_set<MachineInstr *, decltype(Hash), decltype(IsEqual)>
+ DanglingProbes(0, Hash, IsEqual);
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.isPseudoProbe()) {
+ if ((uint32_t)MI.getPseudoProbeAttribute() &
+ (uint32_t)PseudoProbeAttributes::Dangling)
+ if (!DanglingProbes.insert(&MI).second)
+ ToBeRemoved.push_back(&MI);
+ }
+ }
+ }
+
+ for (auto *MI : ToBeRemoved)
+ MI->eraseFromParent();
+
+ Changed |= !ToBeRemoved.empty();
return Changed;
}
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include <unordered_set>
using namespace llvm;
return !ToBeMoved.empty();
}
+
+/// Same dangling probes in one blocks are redundant since they all have the
+/// same semantic that is to rely on the counts inference too to get reasonable
+/// count for the same original block. Therefore, there's no need to keep
+/// multiple copies of them.
+bool removeRedundantPseudoProbes(BasicBlock *Block) {
+
+ auto Hash = [](const PseudoProbeInst *I) {
+ return std::hash<uint64_t>()(I->getFuncGuid()->getZExtValue()) ^
+ std::hash<uint64_t>()(I->getIndex()->getZExtValue());
+ };
+
+ auto IsEqual = [](const PseudoProbeInst *Left, const PseudoProbeInst *Right) {
+ return Left->getFuncGuid() == Right->getFuncGuid() &&
+ Left->getIndex() == Right->getIndex() &&
+ Left->getAttributes() == Right->getAttributes() &&
+ Left->getDebugLoc() == Right->getDebugLoc();
+ };
+
+ SmallVector<PseudoProbeInst *, 4> ToBeRemoved;
+ std::unordered_set<PseudoProbeInst *, decltype(Hash), decltype(IsEqual)>
+ DanglingProbes(0, Hash, IsEqual);
+
+ for (auto &I : *Block) {
+ if (auto *II = dyn_cast<PseudoProbeInst>(&I)) {
+ if (II->getAttributes()->getZExtValue() &
+ (uint32_t)PseudoProbeAttributes::Dangling)
+ if (!DanglingProbes.insert(II).second)
+ ToBeRemoved.push_back(II);
+ }
+ }
+
+ for (auto *I : ToBeRemoved)
+ I->eraseFromParent();
+ return !ToBeRemoved.empty();
+}
} // namespace llvm
// Jump threading may have introduced redundant debug values into BB
// which should be removed.
+ // Remove redundant pseudo probes as well.
if (Changed)
- RemoveRedundantDbgInstrs(&BB);
+ RemoveRedundantDbgInstrs(&BB, true);
// Stop processing BB if it's the entry or is now deleted. The following
// routines attempt to eliminate BB and locating a suitable replacement
// detect and transform nested loops later.
!LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
- RemoveRedundantDbgInstrs(Succ);
+ RemoveRedundantDbgInstrs(Succ, true);
// BB is valid for cleanup here because we passed in DTU. F remains
// BB's parent until a DTU->getDomTree() event.
LVI->eraseBlock(&BB);
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
return !ToBeRemoved.empty();
}
-bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) {
+bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp) {
bool MadeChanges = false;
// By using the "backward scan" strategy before the "forward scan" strategy we
// can remove both dbg.value (2) and (3) in a situation like this:
// already is described as having the value V1 at (1).
MadeChanges |= removeRedundantDbgInstrsUsingBackwardScan(BB);
MadeChanges |= removeRedundantDbgInstrsUsingForwardScan(BB);
+ if (RemovePseudoOp)
+ MadeChanges |= removeRedundantPseudoProbes(BB);
if (MadeChanges)
LLVM_DEBUG(dbgs() << "Removed redundant dbg instrs from: "
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 3, 0, 2
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0
+
+define void @foo2() {
+bb:
+ %tmp = call i32 @f1()
+ %tmp1 = icmp eq i32 %tmp, 1
+ br i1 %tmp1, label %bb5, label %bb8
+
+bb2:
+ %tmp4 = icmp ne i32 %tmp, 1
+ switch i1 %tmp4, label %bb2 [
+ i1 0, label %bb5
+ i1 1, label %bb8
+ ]
+
+bb5:
+;; Check the pseudo probe with id 3 only has one copy.
+; JT-COUNT-1: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 3, i32 2, i64 -1)
+; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 3, i32 2, i64 -1)
+ %tmp6 = phi i1 [ %tmp1, %bb ], [ false, %bb2 ]
+ br i1 %tmp6, label %bb8, label %bb7
+
+bb7:
+ br label %bb8
+
+bb8:
+ ret void
+}
+
define i32 @test(i32 %a, i32 %b, i32 %c) {
;; Check block bb1 and bb2 are gone, and their probes (probe 2 and 3) are dangling.
; SC-LABEL: @test(
--- /dev/null
+; REQUIRES: x86_64-linux
+; RUN: llc -pseudo-probe-for-profiling %s -filetype=asm -o - | FileCheck %s
+
+declare i32 @f1()
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
+
+define void @foo2() {
+bb:
+; CHECK: .pseudoprobe 2494702099028631698 1 0 0
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 1, i32 0, i64 -1)
+ %tmp = call i32 @f1()
+ %tmp1 = icmp eq i32 %tmp, 1
+ br i1 %tmp1, label %bb5, label %bb8
+
+bb2:
+;; Check the pseudo probe with id 2 only has one copy.
+; CHECK-COUNT-1: .pseudoprobe 2494702099028631698 2 0 2
+; CHECK-NOT: .pseudoprobe 2494702099028631698 2 0 2
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
+ %tmp4 = icmp ne i32 %tmp, 1
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
+ switch i1 %tmp4, label %bb2 [
+ i1 0, label %bb5
+ i1 1, label %bb8
+ ]
+
+bb5:
+ %tmp6 = phi i1 [ %tmp1, %bb ], [ false, %bb2 ]
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
+ br i1 %tmp6, label %bb8, label %bb7
+
+bb7:
+ br label %bb8
+
+bb8:
+ ret void
+}
+
+!llvm.pseudo_probe_desc = !{!0}
+
+!0 = !{i64 2494702099028631698, i64 281612674956943, !"foo2", null}
\ No newline at end of file