From 67cf01bd37273af5041ba07e2c57b55fcbf287cb Mon Sep 17 00:00:00 2001 From: Yi Kong Date: Fri, 19 May 2023 14:56:46 -0700 Subject: [PATCH] Reland^2 "[BOLT] Parallelize legacy profile merging" Resovled the issue that when number of tasks is fewer than cores, we end up creating as many threads as the number of cores, making the performance worse than the single thread version. --- bolt/tools/merge-fdata/merge-fdata.cpp | 73 ++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp index 61ca979..757f053 100644 --- a/bolt/tools/merge-fdata/merge-fdata.cpp +++ b/bolt/tools/merge-fdata/merge-fdata.cpp @@ -20,6 +20,9 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/ThreadPool.h" +#include +#include #include using namespace llvm; @@ -258,31 +261,40 @@ bool isYAML(const StringRef Filename) { void mergeLegacyProfiles(const SmallVectorImpl &Filenames) { errs() << "Using legacy profile format.\n"; std::optional BoltedCollection; - StringMap Entries; - for (const std::string &Filename : Filenames) { + std::mutex BoltedCollectionMutex; + typedef StringMap ProfileTy; + + auto ParseProfile = [&](const std::string &Filename, auto &Profiles) { + const llvm::thread::id tid = llvm::this_thread::get_id(); + if (isYAML(Filename)) report_error(Filename, "cannot mix YAML and legacy formats"); ErrorOr> MB = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = MB.getError()) report_error(Filename, EC); - errs() << "Merging data from " << Filename << "...\n"; StringRef Buf = MB.get()->getBuffer(); - // Check if the string "boltedcollection" is in the first line - if (Buf.startswith("boltedcollection\n")) { - if (!BoltedCollection.value_or(true)) - report_error( - Filename, - "cannot mix profile collected in BOLT and non-BOLT deployments"); - BoltedCollection = true; - Buf = Buf.drop_front(17); - } else { - if (BoltedCollection.value_or(false)) - report_error( - Filename, - "cannot mix profile collected in BOLT and non-BOLT deployments"); - BoltedCollection = false; + ProfileTy *Profile; + { + std::lock_guard Lock(BoltedCollectionMutex); + // Check if the string "boltedcollection" is in the first line + if (Buf.startswith("boltedcollection\n")) { + if (!BoltedCollection.value_or(true)) + report_error( + Filename, + "cannot mix profile collected in BOLT and non-BOLT deployments"); + BoltedCollection = true; + Buf = Buf.drop_front(17); + } else { + if (BoltedCollection.value_or(false)) + report_error( + Filename, + "cannot mix profile collected in BOLT and non-BOLT deployments"); + BoltedCollection = false; + } + + Profile = &Profiles[tid]; } SmallVector Lines; @@ -295,15 +307,32 @@ void mergeLegacyProfiles(const SmallVectorImpl &Filenames) { uint64_t Count; if (Line.substr(Pos + 1, Line.size() - Pos).getAsInteger(10, Count)) report_error(Filename, "Malformed / corrupted profile counter"); - Count += Entries.lookup(Signature); - Entries.insert_or_assign(Signature, Count); + Count += Profile->lookup(Signature); + Profile->insert_or_assign(Signature, Count); + } + }; + + // The final reduction has non-trivial cost, make sure each thread has at + // least 4 tasks. + ThreadPoolStrategy S = optimal_concurrency( + std::max(Filenames.size() / 4, static_cast(1))); + ThreadPool Pool(S); + DenseMap ParsedProfiles(Pool.getThreadCount()); + for (const auto &Filename : Filenames) + Pool.async(ParseProfile, std::cref(Filename), std::ref(ParsedProfiles)); + Pool.wait(); + + ProfileTy MergedProfile; + for (const auto &[Thread, Profile] : ParsedProfiles) + for (const auto &[Key, Value] : Profile) { + uint64_t Count = MergedProfile.lookup(Key) + Value; + MergedProfile.insert_or_assign(Key, Count); } - } if (BoltedCollection) output() << "boltedcollection\n"; - for (const auto &Entry : Entries) - output() << Entry.getKey() << " " << Entry.getValue() << "\n"; + for (const auto &[Key, Value] : MergedProfile) + output() << Key << " " << Value << "\n"; errs() << "Profile from " << Filenames.size() << " files merged.\n"; } -- 2.7.4