With https://reviews.llvm.org/D136627, now we have the metrics for profile staleness based on profile statistics, monitoring the profile staleness in real-time can help user quickly identify performance issues. For a production scenario, the build is usually incremental and if we want the real-time metrics, we should store/cache all the old object's metrics somewhere and pull them in a post-build time. To make it more convenient, this patch add an option to persist them into the object binary, the metrics can be reported right away by decoding the binary rather than polling the previous stdout/stderrs from a cache system.
For implementation, it writes the statistics first into a new metadata section(llvm.stats) then encode into a special ELF `.llvm_stats` section. The section data is formatted as a list of key/value pair so that future statistics can be easily extended. This is also under a new switch(`-persist-profile-staleness`)
In terms of size overhead, the metrics are computed at module level, so the size overhead should be small, measured on one of our internal service, it costs less than < 1MB for a 10GB+ binary.
Reviewed By: wenlei
Differential Revision: https://reviews.llvm.org/D136698
/// Return metadata containing the pseudo probe descriptor for a function.
MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, Function *F);
+ /// Return metadata containing llvm statistics.
+ MDNode *
+ createLLVMStats(ArrayRef<std::pair<StringRef, uint64_t>> LLVMStatsVec);
+
//===------------------------------------------------------------------===//
// Range metadata.
//===------------------------------------------------------------------===//
MCSection *PseudoProbeSection = nullptr;
MCSection *PseudoProbeDescSection = nullptr;
+ // Section for metadata of llvm statistics.
+ MCSection *LLVMStatsSection = nullptr;
+
// ELF specific sections.
MCSection *DataRelROSection = nullptr;
MCSection *MergeableConst4Section = nullptr;
MCSection *getPseudoProbeDescSection(StringRef FuncName) const;
+ MCSection *getLLVMStatsSection() const;
+
MCSection *getPCSection(StringRef Name, const MCSection *TextSec) const;
// ELF specific sections.
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Base64.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
}
}
+ if (NamedMDNode *LLVMStats = M.getNamedMetadata("llvm.stats")) {
+ // Emit the metadata for llvm statistics into .llvm_stats section, which is
+ // formatted as a list of key/value pair, the value is base64 encoded.
+ auto *S = C.getObjectFileInfo()->getLLVMStatsSection();
+ Streamer.switchSection(S);
+ for (const auto *Operand : LLVMStats->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ assert(MD->getNumOperands() % 2 == 0 &&
+ ("Operand num should be even for a list of key/value pair"));
+ for (size_t I = 0; I < MD->getNumOperands(); I += 2) {
+ // Encode the key string size.
+ auto *Key = cast<MDString>(MD->getOperand(I));
+ Streamer.emitULEB128IntValue(Key->getString().size());
+ Streamer.emitBytes(Key->getString());
+ // Encode the value into a Base64 string.
+ std::string Value = encodeBase64(
+ Twine(mdconst::dyn_extract<ConstantInt>(MD->getOperand(I + 1))
+ ->getZExtValue())
+ .str());
+ Streamer.emitULEB128IntValue(Value.size());
+ Streamer.emitBytes(Value);
+ }
+ }
+ }
+
unsigned Version = 0;
unsigned Flags = 0;
StringRef Section;
Ops[2] = createString(F->getName());
return MDNode::get(Context, Ops);
}
+
+MDNode *
+MDBuilder::createLLVMStats(ArrayRef<std::pair<StringRef, uint64_t>> LLVMStats) {
+ auto *Int64Ty = Type::getInt64Ty(Context);
+ SmallVector<Metadata *, 4> Ops(LLVMStats.size() * 2);
+ for (size_t I = 0; I < LLVMStats.size(); I++) {
+ Ops[I * 2] = createString(LLVMStats[I].first);
+ Ops[I * 2 + 1] =
+ createConstant(ConstantInt::get(Int64Ty, LLVMStats[I].second));
+ }
+ return MDNode::get(Context, Ops);
+}
PseudoProbeSection = Ctx->getELFSection(".pseudo_probe", DebugSecType, 0);
PseudoProbeDescSection =
Ctx->getELFSection(".pseudo_probe_desc", DebugSecType, 0);
+
+ LLVMStatsSection = Ctx->getELFSection(".llvm_stats", ELF::SHT_PROGBITS, 0);
}
void MCObjectFileInfo::initGOFFMCObjectFileInfo(const Triple &T) {
return PseudoProbeDescSection;
}
+MCSection *MCObjectFileInfo::getLLVMStatsSection() const {
+ return LLVMStatsSection;
+}
+
MCSection *MCObjectFileInfo::getPCSection(StringRef Name,
const MCSection *TextSec) const {
if (Ctx->getObjectFileType() != MCContext::IsELF)
"report-profile-staleness", cl::Hidden, cl::init(false),
cl::desc("Compute and report stale profile statistical metrics."));
+static cl::opt<bool> PersistProfileStaleness(
+ "persist-profile-staleness", cl::Hidden, cl::init(false),
+ cl::desc("Compute stale profile statistical metrics and write it into the "
+ "native object file(.llvm_stats section)."));
+
static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
}
}
- if (ReportProfileStaleness) {
+ if (ReportProfileStaleness || PersistProfileStaleness) {
MatchingManager =
std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
}
detectProfileMismatch(F, *FS);
}
- if (FunctionSamples::ProfileIsProbeBased) {
- errs() << "(" << NumMismatchedFuncHash << "/" << TotalProfiledFunc << ")"
- << " of functions' profile are invalid and "
- << " (" << MismatchedFuncHashSamples << "/" << TotalFuncHashSamples
+ if (ReportProfileStaleness) {
+ if (FunctionSamples::ProfileIsProbeBased) {
+ errs() << "(" << NumMismatchedFuncHash << "/" << TotalProfiledFunc << ")"
+ << " of functions' profile are invalid and "
+ << " (" << MismatchedFuncHashSamples << "/" << TotalFuncHashSamples
+ << ")"
+ << " of samples are discarded due to function hash mismatch.\n";
+ }
+ errs() << "(" << NumMismatchedCallsite << "/" << TotalProfiledCallsite
<< ")"
- << " of samples are discarded due to function hash mismatch.\n";
+ << " of callsites' profile are invalid and "
+ << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples
+ << ")"
+ << " of samples are discarded due to callsite location mismatch.\n";
+ }
+
+ if (PersistProfileStaleness) {
+ LLVMContext &Ctx = M.getContext();
+ MDBuilder MDB(Ctx);
+
+ SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
+ if (FunctionSamples::ProfileIsProbeBased) {
+ ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash);
+ ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
+ ProfStatsVec.emplace_back("MismatchedFuncHashSamples",
+ MismatchedFuncHashSamples);
+ ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
+ }
+ ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
+ MismatchedCallsiteSamples);
+ ProfStatsVec.emplace_back("TotalCallsiteSamples", TotalCallsiteSamples);
+
+ auto *MD = MDB.createLLVMStats(ProfStatsVec);
+ auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
+ NMD->addOperand(MD);
}
- errs() << "(" << NumMismatchedCallsite << "/" << TotalProfiledCallsite << ")"
- << " of callsites' profile are invalid and "
- << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples
- << ")"
- << " of samples are discarded due to callsite location mismatch.\n";
}
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
assert(SymbolMap.count(StringRef()) == 0 &&
"No empty StringRef should be added in SymbolMap");
- if (ReportProfileStaleness)
+ if (ReportProfileStaleness || PersistProfileStaleness)
MatchingManager->detectProfileMismatch();
bool retval = false;
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -S 2>%t
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll
; RUN: FileCheck %s --input-file %t
+; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
+; RUN: llc < %t.ll -filetype=obj -o %t.obj
+; RUN: llvm-objdump --section-headers %t.obj | FileCheck %s --check-prefix=CHECK-OBJ
; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
+; CHECK-MD: ![[#]] = !{!"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30}
+
+; CHECK-OBJ: .llvm_stats
+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof -report-profile-staleness -S 2>%t
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll
; RUN: FileCheck %s --input-file %t
+; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
+; RUN: llc < %t.ll -filetype=obj -o %t.obj
+; RUN: llvm-objdump --section-headers %t.obj | FileCheck %s --check-prefix=CHECK-OBJ
; CHECK: (1/3) of functions' profile are invalid and (10/50) of samples are discarded due to function hash mismatch.
; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
+; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 3, !"MismatchedFuncHashSamples", i64 10, !"TotalFuncHashSamples", i64 50, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30}
+
+; CHECK-OBJ: .llvm_stats
+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"