Currenlty PseudoProbeInserter is a pass conditioned on a target switch. It works well with a single clang invocation. It doesn't work so well when the backend is called separately (i.e, through the linker or llc), where user has always to pass -pseudo-probe-for-profiling explictly. I'm making the pass a default pass that requires no command line arg to trigger, but will be actually run depending on whether the CU comes with `llvm.pseudo_probe_desc` metadata.
Reviewed By: wenlei
Differential Revision: https://reviews.llvm.org/D110209
Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection;
Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo;
Options.EnableAIXExtendedAltivecABI = CodeGenOpts.EnableAIXExtendedAltivecABI;
- Options.PseudoProbeForProfiling = CodeGenOpts.PseudoProbeForProfiling;
Options.ValueTrackingVariableLocations =
CodeGenOpts.ValueTrackingVariableLocations;
Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex;
bool ltoDebugPassManager;
bool ltoEmitAsm;
bool ltoNewPassManager;
- bool ltoPseudoProbeForProfiling;
bool ltoUniqueBasicBlockSectionNames;
bool ltoWholeProgramVisibility;
bool mergeArmExidx;
config->ltoo = args::getInteger(args, OPT_lto_O, 2);
config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
- config->ltoPseudoProbeForProfiling =
- args.hasArg(OPT_lto_pseudo_probe_for_profiling);
config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
config->ltoBasicBlockSections =
args.getLastArgValue(OPT_lto_basic_block_sections);
}
}
- c.Options.PseudoProbeForProfiling = config->ltoPseudoProbeForProfiling;
c.Options.UniqueBasicBlockSectionNames =
config->ltoUniqueBasicBlockSectionNames;
defm lto_whole_program_visibility: BB<"lto-whole-program-visibility",
"Asserts that the LTO link has whole program visibility",
"Asserts that the LTO link does not have whole program visibility">;
-def lto_pseudo_probe_for_profiling: F<"lto-pseudo-probe-for-profiling">,
- HelpText<"Emit pseudo probes for sample profiling">;
def disable_verify: F<"disable-verify">;
defm mllvm: Eq<"mllvm", "Additional arguments to forward to LLVM's option processing">;
def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">,
def: J<"plugin-opt=opt-remarks-hotness-threshold=">,
Alias<opt_remarks_hotness_threshold>,
HelpText<"Alias for --opt-remarks-hotness-threshold">;
-def: J<"plugin-opt=pseudo-probe-for-profiling">,
- Alias<lto_pseudo_probe_for_profiling>, HelpText<"Alias for --lto-pseudo-probe-for-profiling">;
def: J<"plugin-opt=sample-profile=">,
Alias<lto_sample_profile>, HelpText<"Alias for --lto-sample-profile">;
def: F<"plugin-opt=save-temps">, Alias<save_temps>, HelpText<"Alias for --save-temps">;
; REQUIRES: x86
; RUN: opt < %s -passes=pseudo-probe -function-sections -o %t.o
-; RUN: ld.lld %t.o -shared --lto-pseudo-probe-for-profiling --lto-emit-asm -o - | FileCheck %s
-; RUN: ld.lld %t.o -shared -plugin-opt=pseudo-probe-for-profiling --lto-emit-asm -o - | FileCheck %s
+; RUN: ld.lld %t.o -shared --lto-emit-asm -o - | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-scei-ps4"
bool getEnableDebugEntryValues();
-bool getPseudoProbeForProfiling();
-
bool getValueTrackingVariableLocations();
bool getForceDwarfFrameSection();
EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
EmitAddrsig(false), EmitCallSiteInfo(false),
SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
- PseudoProbeForProfiling(false), ValueTrackingVariableLocations(false),
+ ValueTrackingVariableLocations(false),
ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false),
DebugStrictDwarf(false),
FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
/// production.
bool ShouldEmitDebugEntryValues() const;
- /// Emit pseudo probes into the binary for sample profiling
- unsigned PseudoProbeForProfiling : 1;
-
// When set to true, use experimental new debug variable location tracking,
// which seeks to follow the values of variables rather than their location,
// post isel.
CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
-CGOPT(bool, PseudoProbeForProfiling)
CGOPT(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
cl::init(false));
CGBINDOPT(EnableDebugEntryValues);
- static cl::opt<bool> PseudoProbeForProfiling(
- "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"),
- cl::init(false));
- CGBINDOPT(PseudoProbeForProfiling);
-
static cl::opt<bool> ValueTrackingVariableLocations(
"experimental-debug-variable-locations",
cl::desc("Use experimental new value-tracking variable locations"),
Options.EmitAddrsig = getEnableAddrsig();
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
- Options.PseudoProbeForProfiling = getPseudoProbeForProfiling();
Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
MachineFunctionPass::getAnalysisUsage(AU);
}
+ bool doInitialization(Module &M) override {
+ ShouldRun = M.getNamedMetadata(PseudoProbeDescMetadataName);
+ return false;
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!ShouldRun)
+ return false;
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
Name = SP->getName();
return Function::getGUID(Name);
}
+
+ bool ShouldRun = false;
};
} // namespace
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
- // Insert pseudo probe annotation for callsite profiling
- if (TM->Options.PseudoProbeForProfiling)
- addPass(createPseudoProbeInserter());
-
AddingMachinePasses = false;
}
addPass(createEHContGuardCatchretPass());
}
addPass(createX86LoadValueInjectionRetHardeningPass());
+
+ // Insert pseudo probe annotation for callsite profiling
+ addPass(createPseudoProbeInserter());
}
bool X86PassConfig::addPostFastRegAllocRewrite() {
; CHECK-NEXT: X86 Speculative Execution Side Effect Suppression
; CHECK-NEXT: X86 Indirect Thunks
; CHECK-NEXT: Check CFA info and insert CFI instructions if needed
-; CHECK-NEXT: X86 Load Value Injection (LVI) Ret-Hardening
+; CHECK-NEXT: X86 Load Value Injection (LVI) Ret-Hardening
+; CHECK-NEXT: Pseudo Probe Inserter
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: X86 Assembly Printer
; CHECK-NEXT: X86 Indirect Thunks
; CHECK-NEXT: Check CFA info and insert CFI instructions if needed
; CHECK-NEXT: X86 Load Value Injection (LVI) Ret-Hardening
+; CHECK-NEXT: Pseudo Probe Inserter
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: X86 Assembly Printer
; REQUIRES: x86_64-linux
; RUN: opt < %s -passes='pseudo-probe,jump-threading' -S -o %t
; RUN: FileCheck %s < %t --check-prefix=JT
-; RUN: llc -pseudo-probe-for-profiling -function-sections <%t -filetype=asm | FileCheck %s --check-prefix=ASM
+; RUN: llc -function-sections <%t -filetype=asm | FileCheck %s --check-prefix=ASM
; RUN: opt < %s -passes='pseudo-probe' -S -o %t1
-; RUN: llc -pseudo-probe-for-profiling -stop-after=tailduplication <%t1 | FileCheck %s --check-prefix=MIR-tail
+; RUN: llc -stop-after=tailduplication <%t1 | FileCheck %s --check-prefix=MIR-tail
; RUN: opt < %s -passes='pseudo-probe,simplifycfg' -S | FileCheck %s --check-prefix=SC
declare i32 @f1()
; REQUIRES: x86_64-linux
; RUN: opt < %s -passes='pseudo-probe,cgscc(inline)' -function-sections -mtriple=x86_64-unknown-linux-gnu -S -o %t
; RUN: FileCheck %s < %t --check-prefix=CHECK-IL
-; RUN: llc -pseudo-probe-for-profiling -function-sections <%t -filetype=asm -o %t1
+; RUN: llc -function-sections <%t -filetype=asm -o %t1
; RUN: FileCheck %s < %t1 --check-prefix=CHECK-ASM
-; RUN: llc -pseudo-probe-for-profiling -function-sections <%t -filetype=obj -o %t2
+; RUN: llc -function-sections <%t -filetype=obj -o %t2
; RUN: llvm-objdump --section-headers %t2 | FileCheck %s --check-prefix=CHECK-OBJ
; RUN: llvm-mc -filetype=asm <%t1 -o %t3
; RUN: FileCheck %s < %t3 --check-prefix=CHECK-ASM
; REQUIRES: x86_64-linux
; RUN: opt < %s -passes=pseudo-probe -function-sections -S -o %t
; RUN: FileCheck %s < %t --check-prefix=CHECK-IL
-; RUN: llc %t -pseudo-probe-for-profiling -stop-after=pseudo-probe-inserter -o - | FileCheck %s --check-prefix=CHECK-MIR
-; RUN: llc %t -pseudo-probe-for-profiling -function-sections -filetype=asm -o %t1
+; RUN: llc %t -stop-after=pseudo-probe-inserter -o - | FileCheck %s --check-prefix=CHECK-MIR
+; RUN: llc %t -function-sections -filetype=asm -o %t1
; RUN: FileCheck %s < %t1 --check-prefix=CHECK-ASM
-; RUN: llc %t -pseudo-probe-for-profiling -function-sections -filetype=obj -o %t2
+; RUN: llc %t -function-sections -filetype=obj -o %t2
; RUN: llvm-objdump --section-headers %t2 | FileCheck %s --check-prefix=CHECK-OBJ
; RUN: llvm-mc %t1 -filetype=obj -o %t3
; RUN: llvm-objdump --section-headers %t3 | FileCheck %s --check-prefix=CHECK-OBJ
; REQUIRES: x86_64-linux
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-- -pseudo-probe-for-profiling -O3 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-- -O3 | FileCheck %s
define float @foo(float %x) #0 {
%tmp1 = fmul float %x, 3.000000e+00
; CHECK-NEXT: !Attributes: 1
; truncated-pseudoprobe.perfbin is from the following compile commands:
-; llc -pseudo-probe-for-profiling truncated-pseudoprobe.ll -filetype=obj -o truncated-pseudoprobe.o
+; llc truncated-pseudoprobe.ll -filetype=obj -o truncated-pseudoprobe.o
; clang truncated-pseudoprobe.o -o truncated-pseudoprobe.perfbin