From 1ae7d83803e45f6053ec6a606f259653846926b8 Mon Sep 17 00:00:00 2001 From: Gulfem Savrun Yeniceri Date: Thu, 13 Oct 2022 00:50:10 +0000 Subject: [PATCH] [profile] Add binary ids into indexed profiles This patch adds support for including binary ids in an indexed profile. It adds a new field into the header that points to the offset of the binary id section. The binary id section consists of a size of the section, and a list of binary ids (if they are present) that consist of two parts: length and data. This patch guarantees that indexed profile is backwards compatible after adding binary ids. Differential Revision: https://reviews.llvm.org/D135929 --- compiler-rt/include/profile/InstrProfData.inc | 2 +- compiler-rt/test/profile/Linux/binary-id.c | 15 +- compiler-rt/test/profile/Linux/counter_promo_for.c | 2 +- .../test/profile/Linux/counter_promo_nest.c | 2 +- .../test/profile/Linux/counter_promo_while.c | 2 +- .../profile/Linux/instrprof-debug-info-correlate.c | 6 +- llvm/include/llvm/ProfileData/InstrProf.h | 5 +- llvm/include/llvm/ProfileData/InstrProfData.inc | 2 +- llvm/include/llvm/ProfileData/InstrProfReader.h | 21 ++- llvm/include/llvm/ProfileData/InstrProfWriter.h | 7 + llvm/lib/ProfileData/InstrProf.cpp | 9 +- llvm/lib/ProfileData/InstrProfReader.cpp | 184 +++++++++++++++------ llvm/lib/ProfileData/InstrProfWriter.cpp | 66 +++++++- llvm/tools/llvm-profdata/llvm-profdata.cpp | 9 +- 14 files changed, 259 insertions(+), 73 deletions(-) diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 282620d..05419bf 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -650,7 +650,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 8 +#define INSTR_PROF_INDEX_VERSION 9 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 diff --git a/compiler-rt/test/profile/Linux/binary-id.c b/compiler-rt/test/profile/Linux/binary-id.c index 04f54b5..61b8ed9 100644 --- a/compiler-rt/test/profile/Linux/binary-id.c +++ b/compiler-rt/test/profile/Linux/binary-id.c @@ -1,13 +1,13 @@ // REQUIRES: linux // RUN: %clang_profgen -Wl,--build-id=none -O2 -o %t %s // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t -// RUN: llvm-profdata show --binary-ids %t.profraw > %t.out +// RUN: llvm-profdata show --binary-ids %t.profraw > %t.out // RUN: FileCheck %s --check-prefix=NO-BINARY-ID < %t.out // RUN: llvm-profdata merge -o %t.profdata %t.profraw // RUN: %clang_profgen -Wl,--build-id -O2 -o %t %s // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t -// RUN: llvm-profdata show --binary-ids %t.profraw > %t.profraw.out +// RUN: llvm-profdata show --binary-ids %t.profraw > %t.profraw.out // RUN: FileCheck %s --check-prefix=BINARY-ID-RAW-PROF < %t.profraw.out // RUN: rm -rf %t.profdir @@ -17,6 +17,10 @@ // RUN: llvm-profdata show --binary-ids %t.profdir/default_*.profraw > %t.profraw.out // RUN: FileCheck %s --check-prefix=BINARY-ID-MERGE-PROF < %t.profraw.out +// RUN: llvm-profdata merge -o %t.profdata %t.profraw %t.profraw +// RUN: llvm-profdata show --binary-ids %t.profdata > %t.profdata.out +// RUN: FileCheck %s --check-prefix=BINARY-ID-INDEXED-PROF < %t.profraw.out + void foo() { } @@ -48,3 +52,10 @@ int main() { // BINARY-ID-MERGE-PROF-NEXT: Maximum internal block count: 0 // BINARY-ID-MERGE-PROF-NEXT: Binary IDs: // BINARY-ID-MERGE-PROF-NEXT: {{[0-9a-f]+}} + +// BINARY-ID-INDEXED-PROF: Instrumentation level: Front-end +// BINARY-ID-INDEXED-PROF-NEXT: Total functions: 3 +// BINARY-ID-INDEXED-PROF-NEXT: Maximum function count: 3 +// BINARY-ID-INDEXED-PROF-NEXT: Maximum internal block count: 0 +// BINARY-ID-INDEXED-PROF-NEXT: Binary IDs: +// BINARY-ID-INDEXED-PROF-NEXT: {{[0-9a-f]+}} diff --git a/compiler-rt/test/profile/Linux/counter_promo_for.c b/compiler-rt/test/profile/Linux/counter_promo_for.c index 8699605..ffb107a 100644 --- a/compiler-rt/test/profile/Linux/counter_promo_for.c +++ b/compiler-rt/test/profile/Linux/counter_promo_for.c @@ -12,7 +12,7 @@ // RUN: %run %t.nopromo.gen // RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/ // RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata > %t.nopromo.dump -// RUN: diff %t.promo.profdata %t.nopromo.profdata +// RUN: diff <(llvm-profdata show %t.promo.profdata) <(llvm-profdata show %t.nopromo.profdata) int g; __attribute__((noinline)) void bar(int i) { g += i; } diff --git a/compiler-rt/test/profile/Linux/counter_promo_nest.c b/compiler-rt/test/profile/Linux/counter_promo_nest.c index ebd52dd..ac32d16 100644 --- a/compiler-rt/test/profile/Linux/counter_promo_nest.c +++ b/compiler-rt/test/profile/Linux/counter_promo_nest.c @@ -10,7 +10,7 @@ // RUN: %run %t.nopromo.gen // RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/ // RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata > %t.nopromo.dump -// RUN: diff %t.promo.profdata %t.nopromo.profdata +// RUN: diff <(llvm-profdata show %t.promo.profdata) <(llvm-profdata show %t.nopromo.profdata) int g; __attribute__((noinline)) void bar() { g++; diff --git a/compiler-rt/test/profile/Linux/counter_promo_while.c b/compiler-rt/test/profile/Linux/counter_promo_while.c index 8a186f4..3fb6561 100644 --- a/compiler-rt/test/profile/Linux/counter_promo_while.c +++ b/compiler-rt/test/profile/Linux/counter_promo_while.c @@ -12,7 +12,7 @@ // RUN: %run %t.nopromo.gen // RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/ // RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata > %t.nopromo.dump -// RUN: diff %t.promo.profdata %t.nopromo.profdata +// RUN: diff <(llvm-profdata show %t.promo.profdata) <(llvm-profdata show %t.nopromo.profdata) int g; __attribute__((noinline)) void bar(int i) { g += i; } __attribute__((noinline)) void foo(int n, int N) { diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c index bd50234..e40d1be 100644 --- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c +++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c @@ -7,13 +7,13 @@ // RUN: env LLVM_PROFILE_FILE=%t.d4.proflite %run %t.d4 // RUN: llvm-profdata merge -o %t.d4.profdata --debug-info=%t.d4 %t.d4.proflite -// RUN: diff %t.normal.profdata %t.d4.profdata +// RUN: diff <(llvm-profdata show %t.normal.profdata) <(llvm-profdata show %t.d4.profdata) // RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite -// RUN: diff %t.normal.profdata %t.profdata +// RUN: diff <(llvm-profdata show %t.normal.profdata) <(llvm-profdata show %t.profdata) // RUN: %clang_pgogen -o %t.cov -g -mllvm --debug-info-correlate -mllvm -pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.cov.proflite %run %t.cov @@ -23,4 +23,4 @@ // RUN: env LLVM_PROFILE_FILE=%t.cov.profraw %run %t.cov.normal // RUN: llvm-profdata merge -o %t.cov.normal.profdata %t.cov.profraw -// RUN: diff %t.cov.normal.profdata %t.cov.profdata +// RUN: diff <(llvm-profdata show %t.cov.normal.profdata) <(llvm-profdata show %t.cov.profdata) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 0d788a4..eba7885 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -1050,7 +1050,9 @@ enum ProfVersion { Version7 = 7, // An additional (optional) memory profile type is added. Version8 = 8, - // The current version is 8. + // Binary ids are added. + Version9 = 9, + // The current version is 9. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -1068,6 +1070,7 @@ struct Header { uint64_t HashType; uint64_t HashOffset; uint64_t MemProfOffset; + uint64_t BinaryIdOffset; // New fields should only be added at the end to ensure that the size // computation is correct. The methods below need to be updated to ensure that // the new field is read correctly. diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index 282620d..05419bf 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -650,7 +650,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 8 +#define INSTR_PROF_INDEX_VERSION 9 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index ad7ced9..8beba94 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -17,12 +17,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/Object/BuildID.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfCorrelator.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/OnDiskHashTable.h" #include "llvm/Support/SwapByteOrder.h" @@ -96,7 +98,12 @@ public: /// Read a single record. virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; - /// Print binary ids on stream OS. + /// Read a list of binary ids. + virtual Error readBinaryIds(std::vector &BinaryIds) { + return success(); + } + + /// Print binary ids. virtual Error printBinaryIds(raw_ostream &OS) { return success(); }; /// Iterator over profile data. @@ -295,7 +302,9 @@ private: uint32_t ValueKindLast; uint32_t CurValueDataSize; - uint64_t BinaryIdsSize; + /// Total size of binary ids. + uint64_t BinaryIdsSize{0}; + /// Start address of binary id length and data pairs. const uint8_t *BinaryIdsStart; public: @@ -310,6 +319,7 @@ public: static bool hasFormat(const MemoryBuffer &DataBuffer); Error readHeader() override; Error readNextRecord(NamedInstrProfRecord &Record) override; + Error readBinaryIds(std::vector &BinaryIds) override; Error printBinaryIds(raw_ostream &OS) override; uint64_t getVersion() const override { return Version; } @@ -596,6 +606,10 @@ private: std::unique_ptr MemProfRecordTable; /// MemProf frame profile data on-disk indexed via frame id. std::unique_ptr MemProfFrameTable; + /// Total size of binary ids. + uint64_t BinaryIdsSize{0}; + /// Start address of binary id length and data pairs. + const uint8_t *BinaryIdsStart; // Index to the current record in the record array. unsigned RecordIndex; @@ -706,6 +720,9 @@ public: return *Summary; } } + + Error readBinaryIds(std::vector &BinaryIds) override; + Error printBinaryIds(raw_ostream &OS) override; }; } // end namespace llvm diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index 29e0796..087f229 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -18,6 +18,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Object/BuildID.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Endian.h" @@ -50,6 +51,9 @@ private: // inline. llvm::MapVector MemProfFrameData; + // List of binary ids. + std::vector BinaryIds; + // An enum describing the attributes of the profile. InstrProfKind ProfileKind = InstrProfKind::Unknown; // Use raw pointer here for the incomplete type object. @@ -79,6 +83,9 @@ public: bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F, function_ref Warn); + // Add a binary id to the binary ids list. + void addBinaryIds(ArrayRef BIs); + /// Merge existing function counts from the given writer. void mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index fec25a8..7f415df 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1372,9 +1372,12 @@ Expected
Header::readFromBuffer(const unsigned char *Buffer) { // When a new field is added in the header add a case statement here to // populate it. static_assert( - IndexedInstrProf::ProfVersion::CurrentVersion == Version8, + IndexedInstrProf::ProfVersion::CurrentVersion == Version9, "Please update the reading code below if a new field has been added, " "if not add a case statement to fall through to the latest version."); + case 9ull: + H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset)); + [[fallthrough]]; case 8ull: H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset)); [[fallthrough]]; @@ -1391,10 +1394,12 @@ size_t Header::size() const { // When a new field is added to the header add a case statement here to // compute the size as offset of the new field + size of the new field. This // relies on the field being added to the end of the list. - static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version8, + static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version9, "Please update the size computation below if a new field has " "been added to the header, if not add a case statement to " "fall through to the latest version."); + case 9ull: + return offsetOf(&Header::BinaryIdOffset) + sizeof(Header::BinaryIdOffset); case 8ull: return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset); default: // Version7 (when the backwards compatible header was introduced). diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 26d23ae..4dfc3ba 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -75,6 +75,91 @@ static Error initializeReader(InstrProfReader &Reader) { return Reader.readHeader(); } +/// Read a list of binary ids from a profile that consist of +/// a. uint64_t binary id length +/// b. uint8_t binary id data +/// c. uint8_t padding (if necessary) +/// This function is shared between raw and indexed profiles. +/// Raw profiles are in host-endian format, and indexed profiles are in +/// little-endian format. So, this function takes an argument indicating the +/// associated endian format to read the binary ids correctly. +static Error +readBinaryIdsInternal(const MemoryBuffer &DataBuffer, + const uint64_t BinaryIdsSize, + const uint8_t *BinaryIdsStart, + std::vector &BinaryIds, + const llvm::support::endianness Endian) { + using namespace support; + + if (BinaryIdsSize == 0) + return Error::success(); + + const uint8_t *BI = BinaryIdsStart; + const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; + const uint8_t *End = + reinterpret_cast(DataBuffer.getBufferEnd()); + + while (BI < BIEnd) { + size_t Remaining = BIEnd - BI; + // There should be enough left to read the binary id length. + if (Remaining < sizeof(uint64_t)) + return make_error( + instrprof_error::malformed, + "not enough data to read binary id length"); + + uint64_t BILen = 0; + if (Endian == little) + BILen = endian::readNext(BI); + else + BILen = endian::readNext(BI); + + if (BILen == 0) + return make_error(instrprof_error::malformed, + "binary id length is 0"); + + Remaining = BIEnd - BI; + // There should be enough left to read the binary id data. + if (Remaining < alignToPowerOf2(BILen, sizeof(uint64_t))) + return make_error( + instrprof_error::malformed, "not enough data to read binary id data"); + + // Add binary id to the binary ids list. + BinaryIds.push_back(object::BuildID(BI, BI + BILen)); + + // Increment by binary id data length, which aligned to the size of uint64. + BI += alignToPowerOf2(BILen, sizeof(uint64_t)); + if (BI > End) + return make_error( + instrprof_error::malformed, + "binary id section is greater than buffer size"); + } + + return Error::success(); +} + +static Error printBinaryIdsInternal(raw_ostream &OS, + const MemoryBuffer &DataBuffer, + uint64_t BinaryIdsSize, + const uint8_t *BinaryIdsStart, + llvm::support::endianness Endian) { + if (BinaryIdsSize == 0) + return Error::success(); + + std::vector BinaryIds; + if (Error E = readBinaryIdsInternal(DataBuffer, BinaryIdsSize, BinaryIdsStart, + BinaryIds, Endian)) + return E; + + OS << "Binary IDs: \n"; + for (auto BI : BinaryIds) { + for (uint64_t I = 0; I < BI.size(); I++) + OS << format("%02x", BI[I]); + OS << "\n"; + } + + return Error::success(); +} + Expected> InstrProfReader::create(const Twine &Path, const InstrProfCorrelator *Correlator) { @@ -573,54 +658,18 @@ Error RawInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) return success(); } -static size_t RoundUp(size_t size, size_t align) { - return (size + align - 1) & ~(align - 1); +template +Error RawInstrProfReader::readBinaryIds( + std::vector &BinaryIds) { + return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart, + BinaryIds, + llvm::support::endian::system_endianness()); } template Error RawInstrProfReader::printBinaryIds(raw_ostream &OS) { - if (BinaryIdsSize == 0) - return success(); - - OS << "Binary IDs: \n"; - const uint8_t *BI = BinaryIdsStart; - const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; - while (BI < BIEnd) { - size_t Remaining = BIEnd - BI; - - // There should be enough left to read the binary ID size field. - if (Remaining < sizeof(uint64_t)) - return make_error( - instrprof_error::malformed, - "not enough data to read binary id length"); - - uint64_t BinaryIdLen = swap(*reinterpret_cast(BI)); - - // There should be enough left to read the binary ID size field, and the - // binary ID. - if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen) - return make_error( - instrprof_error::malformed, "not enough data to read binary id data"); - - // Increment by binary id length data type size. - BI += sizeof(BinaryIdLen); - if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) - return make_error( - instrprof_error::malformed, - "binary id that is read is bigger than buffer size"); - - for (uint64_t I = 0; I < BinaryIdLen; I++) - OS << format("%02x", BI[I]); - OS << "\n"; - - // Increment by binary id data length, rounded to the next 8 bytes. This - // accounts for the zero-padding after each build ID. - BI += RoundUp(BinaryIdLen, sizeof(uint64_t)); - if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) - return make_error(instrprof_error::malformed); - } - - return success(); + return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart, + llvm::support::endian::system_endianness()); } namespace llvm { @@ -948,9 +997,9 @@ Error IndexedInstrProfReader::readHeader() { Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, /* UseCS */ false); if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF) - Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, - /* UseCS */ true); - + Cur = + readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, + /* UseCS */ true); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( endian::byte_swap(Header->HashType)); @@ -963,8 +1012,8 @@ Error IndexedInstrProfReader::readHeader() { auto IndexPtr = std::make_unique>( Start + HashOffset, Cur, Start, HashType, Header->formatVersion()); - // The MemProfOffset field in the header is only valid when the format version - // is higher than 8 (when it was introduced). + // The MemProfOffset field in the header is only valid when the format + // version is higher than 8 (when it was introduced). if (GET_VERSION(Header->formatVersion()) >= 8 && Header->formatVersion() & VARIANT_MASK_MEMPROF) { uint64_t MemProfOffset = @@ -974,7 +1023,8 @@ Error IndexedInstrProfReader::readHeader() { // The value returned from RecordTableGenerator.Emit. const uint64_t RecordTableOffset = support::endian::readNext(Ptr); - // The offset in the stream right before invoking FrameTableGenerator.Emit. + // The offset in the stream right before invoking + // FrameTableGenerator.Emit. const uint64_t FramePayloadOffset = support::endian::readNext(Ptr); // The value returned from FrameTableGenerator.Emit. @@ -1000,11 +1050,28 @@ Error IndexedInstrProfReader::readHeader() { /*Base=*/Start, memprof::FrameLookupTrait())); } + // BinaryIdOffset field in the header is only valid when the format version + // is higher than 9 (when it was introduced). + if (GET_VERSION(Header->formatVersion()) >= 9) { + uint64_t BinaryIdOffset = + endian::byte_swap(Header->BinaryIdOffset); + const unsigned char *Ptr = Start + BinaryIdOffset; + // Read binary ids size. + BinaryIdsSize = support::endian::readNext(Ptr); + if (BinaryIdsSize % sizeof(uint64_t)) + return error(instrprof_error::bad_header); + // Set the binary ids start. + BinaryIdsStart = Ptr; + if (BinaryIdsStart > (const unsigned char *)DataBuffer->getBufferEnd()) + return make_error(instrprof_error::malformed, + "corrupted binary ids"); + } + // Load the remapping table now if requested. if (RemappingBuffer) { - Remapper = std::make_unique< - InstrProfReaderItaniumRemapper>( - std::move(RemappingBuffer), *IndexPtr); + Remapper = + std::make_unique>( + std::move(RemappingBuffer), *IndexPtr); if (Error E = Remapper->populateRemappings()) return E; } else { @@ -1136,6 +1203,17 @@ Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { return success(); } +Error IndexedInstrProfReader::readBinaryIds( + std::vector &BinaryIds) { + return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart, + BinaryIds, llvm::support::little); +} + +Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) { + return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart, + llvm::support::little); +} + void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { uint64_t NumFuncs = 0; for (const auto &Func : *this) { diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index bbea275..af3c27e 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -54,6 +54,7 @@ public: uint64_t tell() { return OS.tell(); } void write(uint64_t V) { LE.write(V); } + void writeByte(uint8_t V) { LE.write(V); } // \c patch can only be called when all data is written and flushed. // For raw_string_ostream, the patch is done on the target string @@ -280,12 +281,20 @@ bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id, return true; } +void InstrProfWriter::addBinaryIds(ArrayRef BIs) { + llvm::append_range(BinaryIds, BIs); +} + void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn) { for (auto &I : IPW.FunctionData) for (auto &Func : I.getValue()) addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); + BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size()); + for (auto &I : IPW.BinaryIds) + addBinaryIds(I); + MemProfFrameData.reserve(IPW.MemProfFrameData.size()); for (auto &I : IPW.MemProfFrameData) { // If we weren't able to add the frame mappings then it doesn't make sense @@ -330,6 +339,7 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary, Error InstrProfWriter::writeImpl(ProfOStream &OS) { using namespace IndexedInstrProf; + using namespace support; OnDiskChainedHashTableGenerator Generator; @@ -365,11 +375,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; Header.MemProfOffset = 0; + Header.BinaryIdOffset = 0; int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); - // Only write out all the fields except 'HashOffset' and 'MemProfOffset'. We - // need to remember the offset of these fields to allow back patching later. - for (int I = 0; I < N - 2; I++) + // Only write out all the fields except 'HashOffset', 'MemProfOffset' and + // 'BinaryIdOffset'. We need to remember the offset of these fields to allow + // back patching later. + for (int I = 0; I < N - 3; I++) OS.write(reinterpret_cast(&Header)[I]); // Save the location of Header.HashOffset field in \c OS. @@ -384,6 +396,12 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { // profile contains memory profile information. OS.write(0); + // Save the location of binary ids section. + uint64_t BinaryIdSectionOffset = OS.tell(); + // Reserve space for the BinaryIdOffset field to be patched later if this + // profile contains binary ids. + OS.write(0); + // Reserve space to write profile summary data. uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); @@ -460,6 +478,43 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { OS.patch(PatchItems, 3); } + // BinaryIdSection has two parts: + // 1. uint64_t BinaryIdsSectionSize + // 2. list of binary ids that consist of: + // a. uint64_t BinaryIdLength + // b. uint8_t BinaryIdData + // c. uint8_t Padding (if necessary) + uint64_t BinaryIdSectionStart = OS.tell(); + // Calculate size of binary section. + uint64_t BinaryIdsSectionSize = 0; + + // Remove duplicate binary ids. + llvm::sort(BinaryIds); + BinaryIds.erase(std::unique(BinaryIds.begin(), BinaryIds.end()), + BinaryIds.end()); + + for (auto BI : BinaryIds) { + // Increment by binary id length data type size. + BinaryIdsSectionSize += sizeof(uint64_t); + // Increment by binary id data length, aligned to 8 bytes. + BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t)); + } + // Write binary ids section size. + OS.write(BinaryIdsSectionSize); + + for (auto BI : BinaryIds) { + uint64_t BILen = BI.size(); + // Write binary id length. + OS.write(BILen); + // Write binary id data. + for (unsigned K = 0; K < BILen; K++) + OS.writeByte(BI[K]); + // Write padding if necessary. + uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen; + for (unsigned K = 0; K < PaddingSize; K++) + OS.writeByte(0); + } + // Allocate space for data to be serialized out. std::unique_ptr TheSummary = IndexedInstrProf::allocSummary(SummarySize); @@ -482,8 +537,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { PatchItem PatchItems[] = { // Patch the Header.HashOffset field. {HashTableStartFieldOffset, &HashTableStart, 1}, - // Patch the Header.MemProfOffset (=0 for profiles without MemProf data). + // Patch the Header.MemProfOffset (=0 for profiles without MemProf + // data). {MemProfSectionOffset, &MemProfSectionStart, 1}, + // Patch the Header.BinaryIdSectionOffset. + {BinaryIdSectionOffset, &BinaryIdSectionStart, 1}, // Patch the summary data. {SummaryOffset, reinterpret_cast(TheSummary.get()), (int)(SummarySize / sizeof(uint64_t))}, diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 76b745b..90c9e56 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -338,9 +338,16 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, FuncName, firstTime); }); } - if (Reader->hasError()) + + if (Reader->hasError()) { if (Error E = Reader->getError()) WC->Errors.emplace_back(std::move(E), Filename); + } + + std::vector BinaryIds; + if (Error E = Reader->readBinaryIds(BinaryIds)) + WC->Errors.emplace_back(std::move(E), Filename); + WC->Writer.addBinaryIds(BinaryIds); } /// Merge the \p Src writer context into \p Dst. -- 2.7.4