bad_magic,
bad_header,
unsupported_version,
+ unsupported_hash_type,
too_large,
truncated,
malformed,
#define LLVM_PROFILEDATA_INSTRPROF_READER_H_
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/OnDiskHashTable.h"
#include <iterator>
/// Profiling information for a single function.
struct InstrProfRecord {
+ InstrProfRecord() {}
+ InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t> Counts)
+ : Name(Name), Hash(Hash), Counts(Counts) {}
StringRef Name;
uint64_t Hash;
ArrayRef<uint64_t> Counts;
typedef RawInstrProfReader<uint32_t> RawInstrProfReader32;
typedef RawInstrProfReader<uint64_t> RawInstrProfReader64;
+namespace IndexedInstrProf {
+enum class HashT : uint32_t;
+uint64_t ComputeHash(HashT Type, StringRef K);
+}
+
+/// Trait for lookups into the on-disk hash table for the binary instrprof
+/// format.
+class InstrProfLookupTrait {
+ std::vector<uint64_t> CountBuffer;
+ IndexedInstrProf::HashT HashType;
+public:
+ InstrProfLookupTrait(IndexedInstrProf::HashT HashType) : HashType(HashType) {}
+
+ typedef InstrProfRecord data_type;
+ typedef StringRef internal_key_type;
+ typedef StringRef external_key_type;
+ typedef uint64_t hash_value_type;
+ typedef uint64_t offset_type;
+
+ static bool EqualKey(StringRef A, StringRef B) { return A == B; }
+ static StringRef GetInternalKey(StringRef K) { return K; }
+
+ hash_value_type ComputeHash(StringRef K) {
+ return IndexedInstrProf::ComputeHash(HashType, K);
+ }
+
+ static std::pair<offset_type, offset_type>
+ ReadKeyDataLength(const unsigned char *&D) {
+ using namespace support;
+ return std::make_pair(endian::readNext<offset_type, little, unaligned>(D),
+ endian::readNext<offset_type, little, unaligned>(D));
+ }
+
+ StringRef ReadKey(const unsigned char *D, unsigned N) {
+ return StringRef((const char *)D, N);
+ }
+
+ InstrProfRecord ReadData(StringRef K, const unsigned char *D, unsigned N) {
+ if (N < 2 * sizeof(uint64_t) || N % sizeof(uint64_t)) {
+ // The data is corrupt, don't try to read it.
+ CountBuffer.clear();
+ return InstrProfRecord("", 0, CountBuffer);
+ }
+
+ using namespace support;
+
+ // The first stored value is the hash.
+ uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
+ // Each counter follows.
+ unsigned NumCounters = N / sizeof(uint64_t) - 1;
+ CountBuffer.clear();
+ CountBuffer.reserve(NumCounters - 1);
+ for (unsigned I = 0; I < NumCounters; ++I)
+ CountBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
+
+ return InstrProfRecord(K, Hash, CountBuffer);
+ }
+};
+typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
+ InstrProfReaderIndex;
+
+/// Reader for the indexed binary instrprof format.
+class IndexedInstrProfReader : public InstrProfReader {
+private:
+ /// The profile data file contents.
+ std::unique_ptr<MemoryBuffer> DataBuffer;
+ /// The index into the profile data.
+ std::unique_ptr<InstrProfReaderIndex> Index;
+ /// Iterator over the profile data.
+ InstrProfReaderIndex::data_iterator RecordIterator;
+ /// The maximal execution count among all fucntions.
+ uint64_t MaxFunctionCount;
+
+ IndexedInstrProfReader(const IndexedInstrProfReader &) LLVM_DELETED_FUNCTION;
+ IndexedInstrProfReader &operator=(const IndexedInstrProfReader &)
+ LLVM_DELETED_FUNCTION;
+public:
+ IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
+ : DataBuffer(std::move(DataBuffer)), Index(nullptr),
+ RecordIterator(InstrProfReaderIndex::data_iterator()) {}
+
+ /// Return true if the given buffer is in an indexed instrprof format.
+ static bool hasFormat(const MemoryBuffer &DataBuffer);
+
+ /// Read the file header.
+ error_code readHeader() override;
+ /// Read a single record.
+ error_code readNextRecord(InstrProfRecord &Record) override;
+
+ /// Fill Counts with the profile data for the given function name.
+ error_code getFunctionCounts(StringRef FuncName, uint64_t &FuncHash,
+ std::vector<uint64_t> &Counts);
+ /// Return the maximum of all known function counts.
+ uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
+
+ /// Factory method to create an indexed reader.
+ static error_code create(std::string Path,
+ std::unique_ptr<IndexedInstrProfReader> &Result);
+};
+
} // end namespace llvm
#endif // LLVM_PROFILEDATA_INSTRPROF_READER_H_
error_code addFunctionCounts(StringRef FunctionName, uint64_t FunctionHash,
ArrayRef<uint64_t> Counters);
/// Ensure that all data is written to disk.
- void write(raw_ostream &OS);
+ void write(raw_fd_ostream &OS);
};
} // end namespace llvm
return "Invalid header";
case instrprof_error::unsupported_version:
return "Unsupported format version";
+ case instrprof_error::unsupported_hash_type:
+ return "Unsupported hash function";
case instrprof_error::too_large:
return "Too much profile data";
case instrprof_error::truncated:
--- /dev/null
+//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Shared header for the instrumented profile data reader and writer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+#define LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+
+#include "llvm/Support/MD5.h"
+
+namespace llvm {
+
+namespace IndexedInstrProf {
+enum class HashT : uint32_t {
+ MD5,
+
+ Last = MD5
+};
+
+static inline uint64_t MD5Hash(StringRef Str) {
+ MD5 Hash;
+ Hash.update(Str);
+ llvm::MD5::MD5Result Result;
+ Hash.final(Result);
+ // Return the least significant 8 bytes. Our MD5 implementation returns the
+ // result in little endian, so we may need to swap bytes.
+ using namespace llvm::support;
+ return endian::read<uint64_t, little, unaligned>(Result);
+}
+
+uint64_t ComputeHash(HashT Type, StringRef K) {
+ switch (Type) {
+ case HashT::MD5:
+ return IndexedInstrProf::MD5Hash(K);
+ }
+ llvm_unreachable("Unhandled hash type");
+}
+
+const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
+const uint64_t Version = 1;
+const HashT HashType = HashT::MD5;
+}
+
+} // end namespace llvm
+
+#endif // LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "InstrProfIndexed.h"
+
#include <cassert>
using namespace llvm;
-error_code InstrProfReader::create(std::string Path,
- std::unique_ptr<InstrProfReader> &Result) {
- std::unique_ptr<MemoryBuffer> Buffer;
+static error_code setupMemoryBuffer(std::string Path,
+ std::unique_ptr<MemoryBuffer> &Buffer) {
if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
return EC;
// Sanity check the file.
if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
return instrprof_error::too_large;
+ return instrprof_error::success;
+}
+
+static error_code initializeReader(InstrProfReader &Reader) {
+ return Reader.readHeader();
+}
+
+error_code InstrProfReader::create(std::string Path,
+ std::unique_ptr<InstrProfReader> &Result) {
+ // Set up the buffer to read.
+ std::unique_ptr<MemoryBuffer> Buffer;
+ if (error_code EC = setupMemoryBuffer(Path, Buffer))
+ return EC;
// Create the reader.
- if (RawInstrProfReader64::hasFormat(*Buffer))
+ if (IndexedInstrProfReader::hasFormat(*Buffer))
+ Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
+ else if (RawInstrProfReader64::hasFormat(*Buffer))
Result.reset(new RawInstrProfReader64(std::move(Buffer)));
else if (RawInstrProfReader32::hasFormat(*Buffer))
Result.reset(new RawInstrProfReader32(std::move(Buffer)));
else
Result.reset(new TextInstrProfReader(std::move(Buffer)));
- // Read the header and return the result.
- return Result->readHeader();
+ // Initialize the reader and return the result.
+ return initializeReader(*Result);
+}
+
+error_code IndexedInstrProfReader::create(
+ std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
+ // Set up the buffer to read.
+ std::unique_ptr<MemoryBuffer> Buffer;
+ if (error_code EC = setupMemoryBuffer(Path, Buffer))
+ return EC;
+
+ // Create the reader.
+ if (!IndexedInstrProfReader::hasFormat(*Buffer))
+ return instrprof_error::bad_magic;
+ Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
+
+ // Initialize the reader and return the result.
+ return initializeReader(*Result);
}
void InstrProfIterator::Increment() {
template class RawInstrProfReader<uint32_t>;
template class RawInstrProfReader<uint64_t>;
}
+
+bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
+ if (DataBuffer.getBufferSize() < 8)
+ return false;
+ using namespace support;
+ uint64_t Magic =
+ endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
+ return Magic == IndexedInstrProf::Magic;
+}
+
+error_code IndexedInstrProfReader::readHeader() {
+ const unsigned char *Start = (unsigned char *)DataBuffer->getBufferStart();
+ const unsigned char *Cur = Start;
+ if ((unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
+ return error(instrprof_error::truncated);
+
+ using namespace support;
+
+ // Check the magic number.
+ uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
+ if (Magic != IndexedInstrProf::Magic)
+ return error(instrprof_error::bad_magic);
+
+ // Read the version.
+ uint64_t Version = endian::readNext<uint64_t, little, unaligned>(Cur);
+ if (Version != IndexedInstrProf::Version)
+ return error(instrprof_error::unsupported_version);
+
+ // Read the maximal function count.
+ MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);
+
+ // Read the hash type and start offset.
+ IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
+ endian::readNext<uint64_t, little, unaligned>(Cur));
+ if (HashType > IndexedInstrProf::HashT::Last)
+ return error(instrprof_error::unsupported_hash_type);
+ uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
+
+ // The rest of the file is an on disk hash table.
+ Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
+ InstrProfLookupTrait(HashType)));
+ // Set up our iterator for readNextRecord.
+ RecordIterator = Index->data_begin();
+
+ return success();
+}
+
+error_code IndexedInstrProfReader::getFunctionCounts(
+ StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
+ const auto &Iter = Index->find(FuncName);
+ if (Iter == Index->end())
+ return error(instrprof_error::unknown_function);
+
+ // Found it. Make sure it's valid before giving back a result.
+ const InstrProfRecord &Record = *Iter;
+ if (Record.Name.empty())
+ return error(instrprof_error::malformed);
+ FuncHash = Record.Hash;
+ Counts = Record.Counts;
+ return success();
+}
+
+error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
+ // Are we out of records?
+ if (RecordIterator == Index->data_end())
+ return error(instrprof_error::eof);
+
+ // Read the next one.
+ Record = *RecordIterator;
+ ++RecordIterator;
+ if (Record.Name.empty())
+ return error(instrprof_error::malformed);
+ return success();
+}
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/InstrProfWriter.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/OnDiskHashTable.h"
+
+#include "InstrProfIndexed.h"
using namespace llvm;
+namespace {
+class InstrProfRecordTrait {
+public:
+ typedef StringRef key_type;
+ typedef StringRef key_type_ref;
+
+ typedef InstrProfWriter::CounterData data_type;
+ typedef const InstrProfWriter::CounterData &data_type_ref;
+
+ typedef uint64_t hash_value_type;
+ typedef uint64_t offset_type;
+
+ static hash_value_type ComputeHash(key_type_ref K) {
+ return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K);
+ }
+
+ static std::pair<offset_type, offset_type>
+ EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
+ using namespace llvm::support;
+ endian::Writer<little> LE(Out);
+
+ unsigned N = K.size();
+ LE.write<offset_type>(N);
+
+ unsigned M = (1 + V.Counts.size()) * sizeof(uint64_t);
+ LE.write<offset_type>(M);
+
+ return std::make_pair(N, M);
+ }
+
+ static void EmitKey(raw_ostream &Out, key_type_ref K, unsigned N){
+ Out.write(K.data(), N);
+ }
+
+ static void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V,
+ unsigned) {
+ using namespace llvm::support;
+ endian::Writer<little> LE(Out);
+ LE.write<uint64_t>(V.Hash);
+ for (uint64_t I : V.Counts)
+ LE.write<uint64_t>(I);
+ }
+};
+}
+
error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName,
uint64_t FunctionHash,
ArrayRef<uint64_t> Counters) {
return instrprof_error::success;
}
-void InstrProfWriter::write(raw_ostream &OS) {
- // Write out the counts for each function.
+void InstrProfWriter::write(raw_fd_ostream &OS) {
+ OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator;
+ uint64_t MaxFunctionCount = 0;
+
+ // Populate the hash table generator.
for (const auto &I : FunctionData) {
- StringRef Name = I.getKey();
- uint64_t Hash = I.getValue().Hash;
- const std::vector<uint64_t> &Counts = I.getValue().Counts;
-
- OS << Name << "\n" << Hash << "\n" << Counts.size() << "\n";
- for (uint64_t Count : Counts)
- OS << Count << "\n";
- OS << "\n";
+ Generator.insert(I.getKey(), I.getValue());
+ if (I.getValue().Counts[0] > MaxFunctionCount)
+ MaxFunctionCount = I.getValue().Counts[0];
}
+
+ using namespace llvm::support;
+ endian::Writer<little> LE(OS);
+
+ // Write the header.
+ LE.write<uint64_t>(IndexedInstrProf::Magic);
+ LE.write<uint64_t>(IndexedInstrProf::Version);
+ LE.write<uint64_t>(MaxFunctionCount);
+ LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType));
+
+ // Save a space to write the hash table start location.
+ uint64_t HashTableStartLoc = OS.tell();
+ LE.write<uint64_t>(0);
+ // Write the hash table.
+ uint64_t HashTableStart = Generator.Emit(OS);
+
+ // Go back and fill in the hash table start.
+ OS.seek(HashTableStartLoc);
+ LE.write<uint64_t>(HashTableStart);
}
OVERFLOW: overflow.profdata: overflow: Counter overflow
RUN: not llvm-profdata show %p/Inputs/invalid-count-later.profdata 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
-RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.profdata %p/Inputs/invalid-count-later.profdata 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
+RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.profdata %p/Inputs/invalid-count-later.profdata -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
INVALID-COUNT-LATER: error: {{.*}}invalid-count-later.profdata: Malformed profile data
RUN: not llvm-profdata show %p/Inputs/bad-hash.profdata 2>&1 | FileCheck %s --check-prefix=BAD-HASH
-RUN: not llvm-profdata merge %p/Inputs/bad-hash.profdata %p/Inputs/bad-hash.profdata 2>&1 | FileCheck %s --check-prefix=BAD-HASH
+RUN: not llvm-profdata merge %p/Inputs/bad-hash.profdata %p/Inputs/bad-hash.profdata -o /dev/null 2>&1 | FileCheck %s --check-prefix=BAD-HASH
BAD-HASH: error: {{.*}}bad-hash.profdata: Malformed profile data
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3-2.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3
-RUN: llvm-profdata merge %p/Inputs/foo3-2.profdata %p/Inputs/foo3-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3
+RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3-2.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3
+RUN: llvm-profdata merge %p/Inputs/foo3-2.profdata %p/Inputs/foo3-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3
FOO3: foo:
FOO3: Counters: 3
FOO3: Function count: 8
FOO3: Maximum function count: 8
FOO3: Maximum internal block count: 7
-RUN: llvm-profdata merge %p/Inputs/foo4-1.profdata %p/Inputs/foo4-2.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO4
-RUN: llvm-profdata merge %p/Inputs/foo4-2.profdata %p/Inputs/foo4-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO4
+RUN: llvm-profdata merge %p/Inputs/foo4-1.profdata %p/Inputs/foo4-2.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4
+RUN: llvm-profdata merge %p/Inputs/foo4-2.profdata %p/Inputs/foo4-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4
FOO4: foo:
FOO4: Counters: 4
FOO4: Function count: 18
FOO4: Maximum function count: 18
FOO4: Maximum internal block count: 48
-RUN: llvm-profdata merge %p/Inputs/foo3bar3-1.profdata %p/Inputs/foo3bar3-2.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3BAR3
-RUN: llvm-profdata merge %p/Inputs/foo3bar3-2.profdata %p/Inputs/foo3bar3-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3BAR3
+RUN: llvm-profdata merge %p/Inputs/foo3bar3-1.profdata %p/Inputs/foo3bar3-2.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3BAR3
+RUN: llvm-profdata merge %p/Inputs/foo3bar3-2.profdata %p/Inputs/foo3bar3-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3BAR3
FOO3BAR3: foo:
FOO3BAR3: Counters: 3
FOO3BAR3: Function count: 19
FOO3BAR3: Maximum function count: 36
FOO3BAR3: Maximum internal block count: 50
-RUN: llvm-profdata merge %p/Inputs/empty.profdata %p/Inputs/foo3-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3EMPTY
+RUN: llvm-profdata merge %p/Inputs/empty.profdata %p/Inputs/foo3-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3EMPTY
FOO3EMPTY: foo:
FOO3EMPTY: Counters: 3
FOO3EMPTY: Function count: 1
FOO3EMPTY: Maximum function count: 1
FOO3EMPTY: Maximum internal block count: 3
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3bar3-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3FOO3BAR3
+RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3bar3-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3FOO3BAR3
FOO3FOO3BAR3: foo:
FOO3FOO3BAR3: Counters: 3
FOO3FOO3BAR3: Function count: 3
FOO3FOO3BAR3: Maximum function count: 7
FOO3FOO3BAR3: Maximum internal block count: 13
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/bar3-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=DISJOINT
+RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/bar3-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=DISJOINT
DISJOINT: foo:
DISJOINT: Counters: 3
DISJOINT: Function count: 1
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
cl::init("-"),
cl::desc("Output file"));
- cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
- cl::aliasopt(OutputFilename));
+ cl::alias OutputFilenameA("o", cl::desc("Alias for --output"), cl::Required,
+ cl::aliasopt(OutputFilename));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
- if (OutputFilename.empty())
- OutputFilename = "-";
+ if (OutputFilename.compare("-") == 0)
+ exitWithError("Cannot write indexed profdata format to stdout.");
std::string ErrorInfo;
- // FIXME: F_Text would be available if line_iterator could accept CRLF.
raw_fd_ostream Output(OutputFilename.data(), ErrorInfo, sys::fs::F_None);
if (!ErrorInfo.empty())
exitWithError(ErrorInfo, OutputFilename);