*/
struct BoundaryPoint {
// Sum of sample counts beginning at this point
- uint64_t BeginCount;
+ uint64_t BeginCount = UINT64_MAX;
// Sum of sample counts ending at this point
- uint64_t EndCount;
-
- BoundaryPoint() : BeginCount(0), EndCount(0){};
-
- void addBeginCount(uint64_t Count) { BeginCount += Count; }
+ uint64_t EndCount = UINT64_MAX;
+ // Is the begin point of a zero range.
+ bool IsZeroRangeBegin = false;
+ // Is the end point of a zero range.
+ bool IsZeroRangeEnd = false;
+
+ void addBeginCount(uint64_t Count) {
+ if (BeginCount == UINT64_MAX)
+ BeginCount = 0;
+ BeginCount += Count;
+ }
- void addEndCount(uint64_t Count) { EndCount += Count; }
+ void addEndCount(uint64_t Count) {
+ if (EndCount == UINT64_MAX)
+ EndCount = 0;
+ EndCount += Count;
+ }
};
/*
[A, B-1]: 100
[B, B]: 300
[B+1, C]: 200.
+
+ Example for zero value range:
+
+ |<--- 100 --->|
+ |<--- 200 --->|
+ |<--------------- 0 ----------------->|
+ A B C D E F
+
+ [A, B-1] : 0
+ [B, C] : 100
+ [C+1, D-1]: 0
+ [D, E] : 200
+ [E+1, F] : 0
*/
std::map<uint64_t, BoundaryPoint> Boundaries;
for (auto Item : Ranges) {
- uint64_t Begin = Item.first.first;
- uint64_t End = Item.first.second;
- assert(Begin <= End && "Invalid instruction range");
+ assert(Item.first.first <= Item.first.second &&
+ "Invalid instruction range");
+ auto &BeginPoint = Boundaries[Item.first.first];
+ auto &EndPoint = Boundaries[Item.first.second];
uint64_t Count = Item.second;
- if (Boundaries.find(Begin) == Boundaries.end())
- Boundaries[Begin] = BoundaryPoint();
- Boundaries[Begin].addBeginCount(Count);
- if (Boundaries.find(End) == Boundaries.end())
- Boundaries[End] = BoundaryPoint();
- Boundaries[End].addEndCount(Count);
+ BeginPoint.addBeginCount(Count);
+ EndPoint.addEndCount(Count);
+ if (Count == 0) {
+ BeginPoint.IsZeroRangeBegin = true;
+ EndPoint.IsZeroRangeEnd = true;
+ }
}
+ // Use UINT64_MAX to indicate there is no existing range between BeginAddress
+ // and the next valid address
uint64_t BeginAddress = UINT64_MAX;
+ int ZeroRangeDepth = 0;
uint64_t Count = 0;
for (auto Item : Boundaries) {
uint64_t Address = Item.first;
BoundaryPoint &Point = Item.second;
- if (Point.BeginCount) {
+ if (Point.BeginCount != UINT64_MAX) {
if (BeginAddress != UINT64_MAX)
DisjointRanges[{BeginAddress, Address - 1}] = Count;
Count += Point.BeginCount;
BeginAddress = Address;
+ ZeroRangeDepth += Point.IsZeroRangeBegin;
}
- if (Point.EndCount) {
+ if (Point.EndCount != UINT64_MAX) {
assert((BeginAddress != UINT64_MAX) &&
"First boundary point cannot be 'end' point");
DisjointRanges[{BeginAddress, Address}] = Count;
assert(Count >= Point.EndCount && "Mismatched live ranges");
Count -= Point.EndCount;
BeginAddress = Address + 1;
+ ZeroRangeDepth -= Point.IsZeroRangeEnd;
+ // If the remaining count is zero and it's no longer in a zero range, this
+ // means we consume all the ranges before, thus mark BeginAddress as
+ // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
+ // [<---- 10 ---->]
+ // [<---- 20 ---->]
+ // A B C D
+ // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
+ // have the [B+1, C-1] zero range.
+ if (Count == 0 && ZeroRangeDepth == 0)
+ BeginAddress = UINT64_MAX;
}
}
}
ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator);
uint64_t PreviousCount = R ? R.get() : 0;
- if (PreviousCount < Count) {
+ if (PreviousCount <= Count) {
FunctionProfile.addBodySamples(LeafLoc.Callsite.LineOffset,
LeafLoc.Callsite.Discriminator,
Count - PreviousCount);
return *FunctionProfile;
}
+RangeSample
+ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
+ RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
+ // For each range, we search for the range of the function it belongs to and
+ // initialize it with zero count, so it remains zero if doesn't hit any
+ // samples. This is to be consistent with compiler that interpret zero count
+ // as unexecuted(cold).
+ for (auto I : RangeCounter) {
+ uint64_t RangeBegin = I.first.first;
+ uint64_t RangeEnd = I.first.second;
+ // Find the function offset range the current range begin belongs to.
+ auto FuncRange = Binary->findFuncOffsetRange(RangeBegin);
+ if (FuncRange.second == 0)
+ WithColor::warning()
+ << "[" << format("%8" PRIx64, RangeBegin) << " - "
+ << format("%8" PRIx64, RangeEnd)
+ << "]: Invalid range or disassembling error in profiled binary.\n";
+ else if (RangeEnd > FuncRange.second)
+ WithColor::warning() << "[" << format("%8" PRIx64, RangeBegin) << " - "
+ << format("%8" PRIx64, RangeEnd)
+ << "]: Range is across different functions.\n";
+ else
+ Ranges[FuncRange] += 0;
+ }
+ RangeSample DisjointRanges;
+ findDisjointRanges(DisjointRanges, Ranges);
+ return DisjointRanges;
+}
+
void ProfileGenerator::populateBodySamplesForAllFunctions(
const RangeSample &RangeCounter) {
- RangeSample Ranges;
- findDisjointRanges(Ranges, RangeCounter);
- for (auto Range : Ranges) {
+ for (auto Range : preprocessRangeCounter(RangeCounter)) {
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
uint64_t Count = Range.second;
- // Disjoint ranges have introduce zero-filled gap that
- // doesn't belong to current context, filter them out.
- if (Count == 0)
- continue;
InstructionPointer IP(Binary, RangeBegin, true);
// Disjoint ranges may have range in the middle of two instr,
#include "llvm/Support/Path.h"
#include "llvm/Transforms/IPO/SampleContextTracker.h"
#include <list>
+#include <map>
#include <set>
#include <sstream>
#include <string>
PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){};
// Take the two addresses from the start of function as prolog
- void inferPrologOffsets(
- std::unordered_map<uint64_t, std::string> &FuncStartAddrMap) {
- for (auto I : FuncStartAddrMap) {
+ void inferPrologOffsets(std::map<uint64_t, std::pair<std::string, uint64_t>>
+ &FuncStartOffsetMap) {
+ for (auto I : FuncStartOffsetMap) {
PrologEpilogSet.insert(I.first);
InstructionPointer IP(Binary, I.first);
IP.advance();
ContextTrieNode RootContext;
};
+using OffsetRange = std::pair<uint64_t, uint64_t>;
+
class ProfiledBinary {
// Absolute path of the binary.
std::string Path;
// A list of text sections sorted by start RVA and size. Used to check
// if a given RVA is a valid code address.
std::set<std::pair<uint64_t, uint64_t>> TextSections;
- // Function offset to name mapping.
- std::unordered_map<uint64_t, std::string> FuncStartAddrMap;
+ // An ordered map of mapping function's start offset to its name and
+ // end offset.
+ std::map<uint64_t, std::pair<std::string, uint64_t>> FuncStartOffsetMap;
// Offset to context location map. Used to expand the context.
std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap;
// An array of offsets of all instructions sorted in increasing order. The
}
StringRef getFuncFromStartOffset(uint64_t Offset) {
- auto I = FuncStartAddrMap.find(Offset);
- if (I == FuncStartAddrMap.end())
+ auto I = FuncStartOffsetMap.find(Offset);
+ if (I == FuncStartOffsetMap.end())
return StringRef();
- return I->second;
+ return I->second.first;
+ }
+
+ OffsetRange findFuncOffsetRange(uint64_t Offset) {
+ auto I = FuncStartOffsetMap.upper_bound(Offset);
+ if (I == FuncStartOffsetMap.begin())
+ return {0, 0};
+ I--;
+ return {I->first, I->second.second};
}
uint32_t getFuncSizeForContext(SampleContext &Context) {