From: wlei Date: Fri, 14 Oct 2022 03:42:51 +0000 (-0700) Subject: [llvm-profgen] Fix inconsistent loading address issues X-Git-Tag: upstream/17.0.6~30609 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=467652486f24b9f33eca7b4aaa6cb1e8ec6d18a7;p=platform%2Fupstream%2Fllvm.git [llvm-profgen] Fix inconsistent loading address issues This is to fix two issues related with loading address: 1) When multiple MMAPs occur and their loading address are different, before it only used the first MMap as base address, all perf address after it used the wrong base address. 2) For pseudo probe profile, the address is always based on preferred loading address. If the base address is not equal to the preferred loading address, the pseudo probe address query will be wrong. Solution: Instead of converting the address to offset lazily, right now all the address after parsing are converted on the fly based on preferred loading address in the parsing time. There is no "offset" used in profile generator any more. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D126827 --- diff --git a/llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfscript b/llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfscript index 01c34a5..6eaf566 100644 --- a/llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfscript +++ b/llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfscript @@ -1,19 +1,19 @@ -PERF_RECORD_MMAP2 1243676/1243676: [0x201000(0x1000) @ 0 00:1d 224517108 1044165]: r-xp /home/noinline-cs-pseudoprobe.perfbin +PERF_RECORD_MMAP2 1243676/1243676: [0x301000(0x1000) @ 0 00:1d 224517108 1044165]: r-xp /home/noinline-cs-pseudoprobe.perfbin - 20179e - 2017f9 + 30179e + 3017f9 7f83e84e7793 5541f689495641d7 - 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 + 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 - 2017c4 - 2017f9 + 3017c4 + 3017f9 7f83e84e7793 5541f689495641d7 - 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 + 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 - 2017c4 - 2017f9 + 3017c4 + 3017f9 7f83e84e7793 5541f689495641d7 - 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 + 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 0x3017bf/0x301760/P/-/-/0 0x3017cf/0x30179e/P/-/-/0 0x30177f/0x3017c4/P/-/-/0 diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test index 97c96f0..e19d991 100644 --- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -12,9 +12,10 @@ ; CHECK-EXT-ADDR-NEXT: 400870-400870:2 ; CHECK-EXT-ADDR-NEXT: 400875-4008bf:1 ; CHECK-EXT-ADDR-NEXT: 2 -; CHECK-EXT-ADDR-NEXT: 4008bf->400870:2 ; Value 1 is external address ; CHECK-EXT-ADDR-NEXT: 1->400875:1 +; CHECK-EXT-ADDR-NEXT: 4008bf->400870:2 + ; CHECK-SYM-LIST: Dump profile symbol list ; CHECK-SYM-LIST: main diff --git a/llvm/test/tools/llvm-profgen/invalid-range.test b/llvm/test/tools/llvm-profgen/invalid-range.test index 6d6f2cb..4ca43bc 100644 --- a/llvm/test/tools/llvm-profgen/invalid-range.test +++ b/llvm/test/tools/llvm-profgen/invalid-range.test @@ -31,13 +31,13 @@ ; CS-NEXT: 2017bf->201760:6 ; CS-NEXT: 2017cf->20179e:6 ; CS-NEXT: 2017d8->2017e3:1 -; CS-NEXT: [0x7f4] +; CS-NEXT: [0x2017f4] ; CS-NEXT: 1 ; CS-NEXT: 2017c4-2017cf:1 ; CS-NEXT: 2 ; CS-NEXT: 2017bf->201760:1 ; CS-NEXT: 2017cf->20179e:2 -; CS-NEXT: [0x7f4 @ 0x7bf] +; CS-NEXT: [0x2017f4 @ 0x2017bf] ; CS-NEXT: 1 ; CS-NEXT: 201760-20177f:1 ; CS-NEXT: 1 diff --git a/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test b/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test index 8212a7e..c05ef0a 100644 --- a/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test +++ b/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test @@ -17,68 +17,68 @@ PERF_RECORD_MMAP2 2854748/2854748: [0x400000(0x1000) @ 0 00:1d 123291722 526021] ; CHECK: [Probe]: FUNC: bar Index: 1 Type: Block ; CHECK-NEXT: [Probe]: FUNC: bar Index: 4 Type: Block -; CHECK-NEXT: 754: imull $2863311531, %edi, %eax +; CHECK-NEXT: 201754: imull $2863311531, %edi, %eax ; CHECK: : ; CHECK: [Probe]: FUNC: foo Index: 1 Type: Block ; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block -; CHECK-NEXT: 770: movl $1, %ecx +; CHECK-NEXT: 201770: movl $1, %ecx ; CHECK: [Probe]: FUNC: foo Index: 5 Type: Block -; CHECK-NEXT: 780: addl $30, %esi +; CHECK-NEXT: 201780: addl $30, %esi ; CHECK: [Probe]: FUNC: foo Index: 6 Type: Block ; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block -; CHECK-NEXT: 783: addl $1, %ecx +; CHECK-NEXT: 201783: addl $1, %ecx ; CHECK: [Probe]: FUNC: foo Index: 3 Type: Block -; CHECK-NEXT: 78e: movl %ecx, %edx +; CHECK-NEXT: 20178e: movl %ecx, %edx ; CHECK: [Probe]: FUNC: foo Index: 4 Type: Block ; CHECK-NEXT: [Probe]: FUNC: bar Index: 1 Type: Block Inlined: @ foo:8 ; CHECK-NEXT: [Probe]: FUNC: bar Index: 4 Type: Block Inlined: @ foo:8 -; CHECK-NEXT: 7bf: addl %ecx, %edx +; CHECK-NEXT: 2017bf: addl %ecx, %edx ; CHECK: [Probe]: FUNC: foo Index: 6 Type: Block ; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block -; CHECK-NEXT: 7cf: addl $1, %ecx +; CHECK-NEXT: 2017cf: addl $1, %ecx ; CHECK: [Probe]: FUNC: foo Index: 7 Type: Block -; CHECK-NEXT: 7de: movl $2098432, %edi +; CHECK-NEXT: 2017de: movl $2098432, %edi ; CHECK: [Probe]: FUNC: foo Index: 9 Type: DirectCall -; CHECK-NEXT: 7e5: callq 0x930 +; CHECK-NEXT: 2017e5: callq 0x201930 ; CHECK:
: ; CHECK: [Probe]: FUNC: main Index: 1 Type: Block ; CHECK-NEXT: [Probe]: FUNC: foo Index: 1 Type: Block Inlined: @ main:2 ; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block Inlined: @ main:2 -; CHECK-NEXT: 7f0: movl $1, %ecx +; CHECK-NEXT: 2017f0: movl $1, %ecx ; CHECK: [Probe]: FUNC: foo Index: 5 Type: Block Inlined: @ main:2 -; CHECK-NEXT: 800: addl $30, %esi +; CHECK-NEXT: 201800: addl $30, %esi ; CHECK: [Probe]: FUNC: foo Index: 6 Type: Block Inlined: @ main:2 ; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block Inlined: @ main:2 -; CHECK-NEXT: 803: addl $1, %ecx +; CHECK-NEXT: 201803: addl $1, %ecx ; CHECK: [Probe]: FUNC: foo Index: 3 Type: Block Inlined: @ main:2 -; CHECK-NEXT: 80e: movl %ecx, %edx +; CHECK-NEXT: 20180e: movl %ecx, %edx ; CHECK: [Probe]: FUNC: foo Index: 4 Type: Block Inlined: @ main:2 ; CHECK-NEXT: [Probe]: FUNC: bar Index: 1 Type: Block Inlined: @ main:2 @ foo:8 ; CHECK-NEXT: [Probe]: FUNC: bar Index: 4 Type: Block Inlined: @ main:2 @ foo:8 -; CHECK-NEXT: 83f: addl %ecx, %edx +; CHECK-NEXT: 20183f: addl %ecx, %edx ; CHECK: [Probe]: FUNC: foo Index: 6 Type: Block Inlined: @ main:2 ; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block Inlined: @ main:2 -; CHECK-NEXT: 84f: addl $1, %ecx +; CHECK-NEXT: 20184f: addl $1, %ecx ; CHECK: [Probe]: FUNC: foo Index: 7 Type: Block Inlined: @ main:2 -; CHECK-NEXT: 85e: movl $2098432, %edi +; CHECK-NEXT: 20185e: movl $2098432, %edi ; CHECK: [Probe]: FUNC: foo Index: 9 Type: DirectCall Inlined: @ main:2 -; CHECK-NEXT: 865: callq 0x930 +; CHECK-NEXT: 201865: callq 0x201930 ; SYM-NOT: : ; SYM: : diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index 3d83c68..66b4a0c 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -96,8 +96,7 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) { return; } - if (!isValidFallThroughRange(Binary->virtualAddrToOffset(Target), - Binary->virtualAddrToOffset(End), Binary)) { + if (!isValidFallThroughRange(Target, End, Binary)) { // Skip unwinding the rest of LBR trace when a bogus range is seen. State.setInvalid(); return; @@ -186,17 +185,11 @@ void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, return; auto Ret = CtxCounterMap->emplace(Hashable(Key), SampleCounter()); SampleCounter &SCounter = Ret.first->second; - for (auto &Item : Cur->RangeSamples) { - uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item)); - uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item)); - SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item)); - } + for (auto &I : Cur->RangeSamples) + SCounter.recordRangeCount(std::get<0>(I), std::get<1>(I), std::get<2>(I)); - for (auto &Item : Cur->BranchSamples) { - uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item)); - uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item)); - SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item)); - } + for (auto &I : Cur->BranchSamples) + SCounter.recordBranchCount(std::get<0>(I), std::get<1>(I), std::get<2>(I)); } template @@ -467,10 +460,16 @@ static std::string getContextKeyStr(ContextKey *K, for (uint32_t I = 0; I < CtxKey->Context.size(); I++) { if (OContextStr.str().size()) OContextStr << " @ "; + uint64_t Address = CtxKey->Context[I]; + if (UseOffset) { + if (UseLoadableSegmentAsBase) + Address -= Binary->getFirstLoadableAddress(); + else + Address -= Binary->getPreferredBaseAddress(); + } OContextStr << "0x" - << utohexstr( - Binary->virtualAddrToOffset(CtxKey->Context[I]), - /*LowerCase=*/true); + << utohexstr(Address, + /*LowerCase=*/true); } return OContextStr.str(); } else { @@ -570,6 +569,9 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, break; } + // Canonicalize to use preferred load address as base address. + Src = Binary->canonicalizeVirtualAddress(Src); + Dst = Binary->canonicalizeVirtualAddress(Dst); bool SrcIsInternal = Binary->addressIsCode(Src); bool DstIsInternal = Binary->addressIsCode(Dst); if (!SrcIsInternal) @@ -605,6 +607,8 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt, return false; } TraceIt.advance(); + + FrameAddr = Binary->canonicalizeVirtualAddress(FrameAddr); // Currently intermixed frame from different binaries is not supported. if (!Binary->addressIsCode(FrameAddr)) { if (CallStack.empty()) @@ -737,14 +741,14 @@ void PerfScriptReader::writeUnsymbolizedProfile(raw_fd_ostream &OS) { uint64_t Start = I.first.first; uint64_t End = I.first.second; - if (!UseOffset || (UseOffset && UseLoadableSegmentAsBase)) { - Start = Binary->offsetToVirtualAddr(Start); - End = Binary->offsetToVirtualAddr(End); - } - - if (UseOffset && UseLoadableSegmentAsBase) { - Start -= Binary->getFirstLoadableAddress(); - End -= Binary->getFirstLoadableAddress(); + if (UseOffset) { + if (UseLoadableSegmentAsBase) { + Start -= Binary->getFirstLoadableAddress(); + End -= Binary->getFirstLoadableAddress(); + } else { + Start -= Binary->getPreferredBaseAddress(); + End -= Binary->getPreferredBaseAddress(); + } } OS.indent(Indent); @@ -816,13 +820,14 @@ void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt, Range.second.getAsInteger(16, Target)) exitWithErrorForTraceLine(TraceIt); - if (!UseOffset || (UseOffset && UseLoadableSegmentAsBase)) { - uint64_t BaseAddr = 0; - if (UseOffset && UseLoadableSegmentAsBase) - BaseAddr = Binary->getFirstLoadableAddress(); - - Source = Binary->virtualAddrToOffset(Source + BaseAddr); - Target = Binary->virtualAddrToOffset(Target + BaseAddr); + if (UseOffset) { + if (UseLoadableSegmentAsBase) { + Source += Binary->getFirstLoadableAddress(); + Target += Binary->getFirstLoadableAddress(); + } else { + Source += Binary->getPreferredBaseAddress(); + Target += Binary->getPreferredBaseAddress(); + } } Counter[{Source, Target}] += Count; @@ -860,25 +865,26 @@ void UnsymbolizedProfileReader::parsePerfTraces() { void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat) { SampleCounter &Counter = SampleCounters.begin()->second; - uint64_t EndOffeset = 0; + uint64_t EndAddress = 0; for (const LBREntry &LBR : Sample->LBRStack) { - uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source); - uint64_t TargetOffset = Binary->virtualAddrToOffset(LBR.Target); + uint64_t SourceAddress = LBR.Source; + uint64_t TargetAddress = LBR.Target; - // Record the branch if its sourceOffset is external. It can be the case an + // Record the branch if its SourceAddress is external. It can be the case an // external source call an internal function, later this branch will be used // to generate the function's head sample. - if (Binary->offsetIsCode(TargetOffset)) { - Counter.recordBranchCount(SourceOffset, TargetOffset, Repeat); + if (Binary->addressIsCode(TargetAddress)) { + Counter.recordBranchCount(SourceAddress, TargetAddress, Repeat); } // If this not the first LBR, update the range count between TO of current // LBR and FROM of next LBR. - uint64_t StartOffset = TargetOffset; - if (Binary->offsetIsCode(StartOffset) && Binary->offsetIsCode(EndOffeset) && - isValidFallThroughRange(StartOffset, EndOffeset, Binary)) - Counter.recordRangeCount(StartOffset, EndOffeset, Repeat); - EndOffeset = SourceOffset; + uint64_t StartAddress = TargetAddress; + if (Binary->addressIsCode(StartAddress) && + Binary->addressIsCode(EndAddress) && + isValidFallThroughRange(StartAddress, EndAddress, Binary)) + Counter.recordRangeCount(StartAddress, EndAddress, Repeat); + EndAddress = SourceAddress; } } @@ -1088,13 +1094,13 @@ void PerfScriptReader::warnInvalidRange() { for (const auto &Item : AggregatedSamples) { const PerfSample *Sample = Item.first.getPtr(); uint64_t Count = Item.second; - uint64_t EndOffeset = 0; + uint64_t EndAddress = 0; for (const LBREntry &LBR : Sample->LBRStack) { - uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source); - uint64_t StartOffset = Binary->virtualAddrToOffset(LBR.Target); - if (EndOffeset != 0) - Ranges[{StartOffset, EndOffeset}] += Count; - EndOffeset = SourceOffset; + uint64_t SourceAddress = LBR.Source; + uint64_t StartAddress = LBR.Target; + if (EndAddress != 0) + Ranges[{StartAddress, EndAddress}] += Count; + EndAddress = SourceAddress; } } @@ -1103,17 +1109,14 @@ void PerfScriptReader::warnInvalidRange() { return; } - auto WarnInvalidRange = - [&](uint64_t StartOffset, uint64_t EndOffset, StringRef Msg) { - if (!ShowDetailedWarning) - return; - WithColor::warning() - << "[" - << format("%8" PRIx64, Binary->offsetToVirtualAddr(StartOffset)) - << "," - << format("%8" PRIx64, Binary->offsetToVirtualAddr(EndOffset)) - << "]: " << Msg << "\n"; - }; + auto WarnInvalidRange = [&](uint64_t StartAddress, uint64_t EndAddress, + StringRef Msg) { + if (!ShowDetailedWarning) + return; + WithColor::warning() << "[" << format("%8" PRIx64, StartAddress) << "," + << format("%8" PRIx64, EndAddress) << "]: " << Msg + << "\n"; + }; const char *EndNotBoundaryMsg = "Range is not on instruction boundary, " "likely due to profile and binary mismatch."; @@ -1131,31 +1134,37 @@ void PerfScriptReader::warnInvalidRange() { uint64_t BogusRange = 0; for (auto &I : Ranges) { - uint64_t StartOffset = I.first.first; - uint64_t EndOffset = I.first.second; + uint64_t StartAddress = I.first.first; + uint64_t EndAddress = I.first.second; TotalRangeNum += I.second; - if (!Binary->offsetIsCode(StartOffset) || - !Binary->offsetIsTransfer(EndOffset)) { + if (!Binary->addressIsCode(StartAddress) && + !Binary->addressIsCode(EndAddress)) + continue; + + if (!Binary->addressIsCode(StartAddress) || + !Binary->addressIsTransfer(EndAddress)) { InstNotBoundary += I.second; - WarnInvalidRange(StartOffset, EndOffset, EndNotBoundaryMsg); + WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg); } - auto *FRange = Binary->findFuncRangeForOffset(StartOffset); + auto *FRange = Binary->findFuncRange(StartAddress); if (!FRange) { UnmatchedRange += I.second; - WarnInvalidRange(StartOffset, EndOffset, DanglingRangeMsg); + WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg); continue; } - if (EndOffset >= FRange->EndOffset) { + if (EndAddress >= FRange->EndAddress) { RangeCrossFunc += I.second; - WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg); + WarnInvalidRange(StartAddress, EndAddress, RangeCrossFuncMsg); } - if (!isValidFallThroughRange(StartOffset, EndOffset, Binary)) { + if (Binary->addressIsCode(StartAddress) && + Binary->addressIsCode(EndAddress) && + !isValidFallThroughRange(StartAddress, EndAddress, Binary)) { BogusRange += I.second; - WarnInvalidRange(StartOffset, EndOffset, BogusRangeMsg); + WarnInvalidRange(StartAddress, EndAddress, BogusRangeMsg); } } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 6acbb14..b67fe1c 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -420,25 +420,24 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile( // function. for (const auto &CI : *SampleCounters) { if (const auto *CtxKey = dyn_cast(CI.first.getPtr())) { - for (auto Addr : CtxKey->Context) { - if (FuncRange *FRange = Binary->findFuncRangeForOffset( - Binary->virtualAddrToOffset(Addr))) + for (auto StackAddr : CtxKey->Context) { + if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) ProfiledFunctions.insert(FRange->Func); } } for (auto Item : CI.second.RangeCounter) { - uint64_t StartOffset = Item.first.first; - if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) + uint64_t StartAddress = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) ProfiledFunctions.insert(FRange->Func); } for (auto Item : CI.second.BranchCounter) { - uint64_t SourceOffset = Item.first.first; - uint64_t TargetOffset = Item.first.first; - if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset)) + uint64_t SourceAddress = Item.first.first; + uint64_t TargetAddress = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) ProfiledFunctions.insert(FRange->Func); - if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset)) + if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) ProfiledFunctions.insert(FRange->Func); } } @@ -565,16 +564,15 @@ void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( const BranchSample &BranchCounters) { for (const auto &Entry : BranchCounters) { - uint64_t SourceOffset = Entry.first.first; - uint64_t TargetOffset = Entry.first.second; + uint64_t SourceAddress = Entry.first.first; + uint64_t TargetAddress = Entry.first.second; uint64_t Count = Entry.second; assert(Count != 0 && "Unexpected zero weight branch"); - StringRef CalleeName = getCalleeNameForOffset(TargetOffset); + StringRef CalleeName = getCalleeNameForAddress(TargetAddress); if (CalleeName.size() == 0) continue; - uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress); if (CallProbe == nullptr) @@ -644,8 +642,8 @@ ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { // samples. This is to be consistent with compiler that interpret zero count // as unexecuted(cold). for (const auto &I : RangeCounter) { - uint64_t StartOffset = I.first.first; - for (const auto &Range : Binary->getRangesForOffset(StartOffset)) + uint64_t StartAddress = I.first.first; + for (const auto &Range : Binary->getRanges(StartAddress)) Ranges[{Range.first, Range.second - 1}] += 0; } } @@ -657,8 +655,8 @@ ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { void ProfileGenerator::populateBodySamplesForAllFunctions( const RangeSample &RangeCounter) { for (const auto &Range : preprocessRangeCounter(RangeCounter)) { - uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); - uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); + uint64_t RangeBegin = Range.first.first; + uint64_t RangeEnd = Range.first.second; uint64_t Count = Range.second; InstructionPointer IP(Binary, RangeBegin, true); @@ -669,16 +667,15 @@ void ProfileGenerator::populateBodySamplesForAllFunctions( continue; do { - uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); const SampleContextFrameVector &FrameVec = - Binary->getFrameLocationStack(Offset); + Binary->getFrameLocationStack(IP.Address); if (!FrameVec.empty()) { // FIXME: As accumulating total count per instruction caused some // regression, we changed to accumulate total count per byte as a // workaround. Tuning hotness threshold on the compiler side might be // necessary in the future. FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( - FrameVec, Count * Binary->getInstSize(Offset)); + FrameVec, Count * Binary->getInstSize(IP.Address)); updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), Count); } @@ -686,9 +683,10 @@ void ProfileGenerator::populateBodySamplesForAllFunctions( } } -StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { +StringRef +ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) { // Get the function range by branch target if it's a call branch. - auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); + auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress); // We won't accumulate sample count for a range whose start is not the real // function entry such as outlined function or inner labels. @@ -701,17 +699,17 @@ StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { void ProfileGenerator::populateBoundarySamplesForAllFunctions( const BranchSample &BranchCounters) { for (const auto &Entry : BranchCounters) { - uint64_t SourceOffset = Entry.first.first; - uint64_t TargetOffset = Entry.first.second; + uint64_t SourceAddress = Entry.first.first; + uint64_t TargetAddress = Entry.first.second; uint64_t Count = Entry.second; assert(Count != 0 && "Unexpected zero weight branch"); - StringRef CalleeName = getCalleeNameForOffset(TargetOffset); + StringRef CalleeName = getCalleeNameForAddress(TargetAddress); if (CalleeName.size() == 0) continue; // Record called target sample and its count. const SampleContextFrameVector &FrameVec = - Binary->getFrameLocationStack(SourceOffset); + Binary->getFrameLocationStack(SourceAddress); if (!FrameVec.empty()) { FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, 0); @@ -842,8 +840,8 @@ void CSProfileGenerator::populateBodySamplesForFunction( RangeSample Ranges; findDisjointRanges(Ranges, RangeCounter); for (const auto &Range : Ranges) { - uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); - uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); + uint64_t RangeBegin = Range.first.first; + uint64_t RangeEnd = Range.first.second; uint64_t Count = Range.second; // Disjoint ranges have introduce zero-filled gap that // doesn't belong to current context, filter them out. @@ -858,8 +856,7 @@ void CSProfileGenerator::populateBodySamplesForFunction( continue; do { - uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); - auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); + auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address); if (LeafLoc) { // Recording body sample for this specific context updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); @@ -873,12 +870,12 @@ void CSProfileGenerator::populateBoundarySamplesForFunction( ContextTrieNode *Node, const BranchSample &BranchCounters) { for (const auto &Entry : BranchCounters) { - uint64_t SourceOffset = Entry.first.first; - uint64_t TargetOffset = Entry.first.second; + uint64_t SourceAddress = Entry.first.first; + uint64_t TargetAddress = Entry.first.second; uint64_t Count = Entry.second; assert(Count != 0 && "Unexpected zero weight branch"); - StringRef CalleeName = getCalleeNameForOffset(TargetOffset); + StringRef CalleeName = getCalleeNameForAddress(TargetAddress); if (CalleeName.size() == 0) continue; @@ -886,7 +883,7 @@ void CSProfileGenerator::populateBoundarySamplesForFunction( LineLocation CalleeCallSite(0, 0); if (CallerNode != &getRootContext()) { // Record called target sample and its count - auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); + auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress); if (LeafLoc) { CallerNode->getFunctionSamples()->addCalledTargetSamples( LeafLoc->Location.LineOffset, @@ -1054,8 +1051,8 @@ void ProfileGeneratorBase::extractProbesFromRange( } for (const auto &Range : *PRanges) { - uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); - uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); + uint64_t RangeBegin = Range.first.first; + uint64_t RangeEnd = Range.first.second; uint64_t Count = Range.second; InstructionPointer IP(Binary, RangeBegin, true); @@ -1078,13 +1075,13 @@ void ProfileGeneratorBase::extractProbesFromRange( } } -static void -extractPrefixContextStack(SampleContextFrameVector &ContextStack, - const SmallVectorImpl &Addresses, - ProfiledBinary *Binary) { +static void extractPrefixContextStack(SampleContextFrameVector &ContextStack, + const SmallVectorImpl &AddrVec, + ProfiledBinary *Binary) { SmallVector Probes; - for (auto Addr : reverse(Addresses)) { - const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr); + for (auto Address : reverse(AddrVec)) { + const MCDecodedPseudoProbe *CallProbe = + Binary->getCallProbeForAddr(Address); // These could be the cases when a probe is not found at a calliste. Cutting // off the context from here since the inliner will not know how to consume // a context with unknown callsites. @@ -1187,10 +1184,9 @@ void CSProfileGenerator::populateBodySamplesWithProbes( void CSProfileGenerator::populateBoundarySamplesWithProbes( const BranchSample &BranchCounter, SampleContextFrames ContextStack) { for (const auto &BI : BranchCounter) { - uint64_t SourceOffset = BI.first.first; - uint64_t TargetOffset = BI.first.second; + uint64_t SourceAddress = BI.first.first; + uint64_t TargetAddress = BI.first.second; uint64_t Count = BI.second; - uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress); if (CallProbe == nullptr) @@ -1199,7 +1195,7 @@ void CSProfileGenerator::populateBoundarySamplesWithProbes( getFunctionProfileForLeafProbe(ContextStack, CallProbe); FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); FunctionProfile.addTotalSamples(Count); - StringRef CalleeName = getCalleeNameForOffset(TargetOffset); + StringRef CalleeName = getCalleeNameForAddress(TargetAddress); if (CalleeName.size() == 0) continue; FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h index 0ce4645..105b85e 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -108,7 +108,7 @@ protected: void updateCallsiteSamples(); - StringRef getCalleeNameForOffset(uint64_t TargetOffset); + StringRef getCalleeNameForAddress(uint64_t TargetAddress); void computeSummaryAndThreshold(SampleProfileMap &ProfileMap); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index eef5b8e..7648c8e 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -165,7 +165,7 @@ void ProfiledBinary::warnNoFuncEntry() { continue; bool hasFuncEntry = false; for (auto &R : F.second.Ranges) { - if (FuncRange *FR = findFuncRangeForStartOffset(R.first)) { + if (FuncRange *FR = findFuncRangeForStartAddr(R.first)) { if (FR->IsFuncEntry) { hasFuncEntry = true; break; @@ -224,8 +224,8 @@ void ProfiledBinary::load() { disassemble(Obj); // Use function start and return address to infer prolog and epilog - ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap); - ProEpilogTracker.inferEpilogOffsets(RetOffsets); + ProEpilogTracker.inferPrologAddresses(StartAddrToFuncRangeMap); + ProEpilogTracker.inferEpilogAddresses(RetAddressSet); warnNoFuncEntry(); @@ -233,10 +233,8 @@ void ProfiledBinary::load() { } bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) { - uint64_t Offset1 = virtualAddrToOffset(Address1); - uint64_t Offset2 = virtualAddrToOffset(Address2); - const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1); - const SampleContextFrameVector &Context2 = getFrameLocationStack(Offset2); + const SampleContextFrameVector &Context1 = getFrameLocationStack(Address1); + const SampleContextFrameVector &Context2 = getFrameLocationStack(Address2); if (Context1.size() != Context2.size()) return false; if (Context1.empty()) @@ -255,9 +253,8 @@ ProfiledBinary::getExpandedContext(const SmallVectorImpl &Stack, return ContextVec; // Process from frame root to leaf for (auto Address : Stack) { - uint64_t Offset = virtualAddrToOffset(Address); const SampleContextFrameVector &ExpandedContext = - getFrameLocationStack(Offset); + getFrameLocationStack(Address); // An instruction without a valid debug line will be ignored by sample // processing if (ExpandedContext.empty()) @@ -402,10 +399,10 @@ void ProfiledBinary::decodePseudoProbe() { decodePseudoProbe(Obj); } -void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) { - // Note that the start offset of each ELF section can be a non-function +void ProfiledBinary::setIsFuncEntry(uint64_t Address, StringRef RangeSymName) { + // Note that the start address of each ELF section can be a non-function // symbol, we need to binary search for the start of a real function range. - auto *FuncRange = findFuncRangeForOffset(Offset); + auto *FuncRange = findFuncRange(Address); // Skip external function symbol. if (!FuncRange) return; @@ -421,13 +418,12 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, SectionSymbolsTy &Symbols, const SectionRef &Section) { std::size_t SE = Symbols.size(); - uint64_t SectionOffset = Section.getAddress() - getPreferredBaseAddress(); + uint64_t SectionAddress = Section.getAddress(); uint64_t SectSize = Section.getSize(); - uint64_t StartOffset = Symbols[SI].Addr - getPreferredBaseAddress(); - uint64_t NextStartOffset = - (SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress() - : SectionOffset + SectSize; - setIsFuncEntry(StartOffset, + uint64_t StartAddress = Symbols[SI].Addr; + uint64_t NextStartAddress = + (SI + 1 < SE) ? Symbols[SI + 1].Addr : SectionAddress + SectSize; + setIsFuncEntry(StartAddress, FunctionSamples::getCanonicalFnName(Symbols[SI].Name)); StringRef SymbolName = @@ -446,36 +442,34 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, << format("%8" PRIx64, End) << "\n"; }; - uint64_t Offset = StartOffset; - // Size of a consecutive invalid instruction range starting from Offset -1 + uint64_t Address = StartAddress; + // Size of a consecutive invalid instruction range starting from Address -1 // backwards. uint64_t InvalidInstLength = 0; - while (Offset < NextStartOffset) { + while (Address < NextStartAddress) { MCInst Inst; uint64_t Size; // Disassemble an instruction. - bool Disassembled = - DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset), - Offset + getPreferredBaseAddress(), nulls()); + bool Disassembled = DisAsm->getInstruction( + Inst, Size, Bytes.slice(Address - SectionAddress), Address, nulls()); if (Size == 0) Size = 1; if (ShowDisassembly) { if (ShowPseudoProbe) { - ProbeDecoder.printProbeForAddress(outs(), - Offset + getPreferredBaseAddress()); + ProbeDecoder.printProbeForAddress(outs(), Address); } - outs() << format("%8" PRIx64 ":", Offset + getPreferredBaseAddress()); + outs() << format("%8" PRIx64 ":", Address); size_t Start = outs().tell(); if (Disassembled) - IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs()); + IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), outs()); else outs() << "\t"; if (ShowSourceLocations) { unsigned Cur = outs().tell() - Start; if (Cur < 40) outs().indent(40 - Cur); - InstructionPointer IP(this, Offset); + InstructionPointer IP(this, Address); outs() << getReversedLocWithContext( symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe)); } @@ -486,35 +480,35 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode()); // Record instruction size. - Offset2InstSizeMap[Offset] = Size; + AddressToInstSizeMap[Address] = Size; // Populate address maps. - CodeAddrOffsets.push_back(Offset); + CodeAddressVec.push_back(Address); if (MCDesc.isCall()) { - CallOffsets.insert(Offset); - UncondBranchOffsets.insert(Offset); + CallAddressSet.insert(Address); + UncondBranchAddrSet.insert(Address); } else if (MCDesc.isReturn()) { - RetOffsets.insert(Offset); - UncondBranchOffsets.insert(Offset); + RetAddressSet.insert(Address); + UncondBranchAddrSet.insert(Address); } else if (MCDesc.isBranch()) { if (MCDesc.isUnconditionalBranch()) - UncondBranchOffsets.insert(Offset); - BranchOffsets.insert(Offset); + UncondBranchAddrSet.insert(Address); + BranchAddressSet.insert(Address); } if (InvalidInstLength) { - WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1); + WarnInvalidInsts(Address - InvalidInstLength, Address - 1); InvalidInstLength = 0; } } else { InvalidInstLength += Size; } - Offset += Size; + Address += Size; } if (InvalidInstLength) - WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1); + WarnInvalidInsts(Address - InvalidInstLength, Address - 1); if (ShowDisassembly) outs() << "\n"; @@ -599,13 +593,13 @@ void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) { continue; uint64_t ImageLoadAddr = getPreferredBaseAddress(); - uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr; + uint64_t SectionAddress = Section.getAddress() - ImageLoadAddr; uint64_t SectSize = Section.getSize(); if (!SectSize) continue; // Register the text section. - TextSections.insert({SectionOffset, SectSize}); + TextSections.insert({SectionAddress, SectSize}); StringRef SectionName = unwrapOrError(Section.getName(), FileName); @@ -685,30 +679,28 @@ void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { Func.FuncName = Ret.first->first; for (const auto &Range : Ranges) { - uint64_t FuncStart = Range.LowPC; - uint64_t FuncSize = Range.HighPC - FuncStart; + uint64_t StartAddress = Range.LowPC; + uint64_t EndAddress = Range.HighPC; - if (FuncSize == 0 || FuncStart < getPreferredBaseAddress()) + if (EndAddress <= StartAddress || + StartAddress < getPreferredBaseAddress()) continue; - uint64_t StartOffset = FuncStart - getPreferredBaseAddress(); - uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress(); - // We may want to know all ranges for one function. Here group the // ranges and store them into BinaryFunction. - Func.Ranges.emplace_back(StartOffset, EndOffset); + Func.Ranges.emplace_back(StartAddress, EndAddress); - auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange()); + auto R = StartAddrToFuncRangeMap.emplace(StartAddress, FuncRange()); if (R.second) { FuncRange &FRange = R.first->second; FRange.Func = &Func; - FRange.StartOffset = StartOffset; - FRange.EndOffset = EndOffset; + FRange.StartAddress = StartAddress; + FRange.EndAddress = EndAddress; } else { WithColor::warning() << "Duplicated symbol start address at " - << format("%8" PRIx64, StartOffset + getPreferredBaseAddress()) - << " " << R.first->second.getFuncName() << " and " << Name << "\n"; + << format("%8" PRIx64, StartAddress) << " " + << R.first->second.getFuncName() << " and " << Name << "\n"; } } } @@ -749,7 +741,7 @@ void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) { void ProfiledBinary::populateSymbolListFromDWARF( ProfileSymbolList &SymbolList) { - for (auto &I : StartOffset2FuncRangeMap) + for (auto &I : StartAddrToFuncRangeMap) SymbolList.add(I.second.getFuncName()); } @@ -770,7 +762,7 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, bool UseProbeDiscriminator) { assert(this == IP.Binary && "Binary should only symbolize its own instruction"); - auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(), + auto Addr = object::SectionedAddress{IP.Address, object::SectionedAddress::UndefSection}; DIInliningInfo InlineStack = unwrapOrError( Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr), @@ -802,10 +794,8 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, return CallStack; } -void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset, - uint64_t EndOffset) { - uint64_t RangeBegin = offsetToVirtualAddr(StartOffset); - uint64_t RangeEnd = offsetToVirtualAddr(EndOffset); +void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin, + uint64_t RangeEnd) { InstructionPointer IP(this, RangeBegin, true); if (IP.Address != RangeBegin) @@ -816,10 +806,9 @@ void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset, return; do { - uint64_t Offset = virtualAddrToOffset(IP.Address); const SampleContextFrameVector &SymbolizedCallStack = - getFrameLocationStack(Offset, UsePseudoProbes); - uint64_t Size = Offset2InstSizeMap[Offset]; + getFrameLocationStack(IP.Address, UsePseudoProbes); + uint64_t Size = AddressToInstSizeMap[IP.Address]; // Record instruction size for the corresponding context FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size); @@ -853,7 +842,7 @@ InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, if (RoundToNext) { // we might get address which is not the code // it should round to the next valid address - if (Index >= Binary->getCodeOffsetsSize()) + if (Index >= Binary->getCodeAddrVecSize()) this->Address = UINT64_MAX; else this->Address = Binary->getAddressforIndex(Index); @@ -862,7 +851,7 @@ InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, bool InstructionPointer::advance() { Index++; - if (Index >= Binary->getCodeOffsetsSize()) { + if (Index >= Binary->getCodeAddrVecSize()) { Address = UINT64_MAX; return false; } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index c099316..4bc58ef 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -56,12 +56,8 @@ class ProfiledBinary; struct InstructionPointer { const ProfiledBinary *Binary; - union { - // Offset of the executable segment of the binary. - uint64_t Offset = 0; - // Also used as address in unwinder - uint64_t Address; - }; + // Address of the executable segment of the binary. + uint64_t Address; // Index to the sorted code address array of the binary. uint64_t Index = 0; InstructionPointer(const ProfiledBinary *Binary, uint64_t Address, @@ -100,46 +96,47 @@ struct BinaryFunction { // Info about function range. A function can be split into multiple // non-continuous ranges, each range corresponds to one FuncRange. struct FuncRange { - uint64_t StartOffset; - // EndOffset is an exclusive bound. - uint64_t EndOffset; + uint64_t StartAddress; + // EndAddress is an exclusive bound. + uint64_t EndAddress; // Function the range belongs to BinaryFunction *Func; - // Whether the start offset is the real entry of the function. + // Whether the start address is the real entry of the function. bool IsFuncEntry = false; StringRef getFuncName() { return Func->FuncName; } }; -// PrologEpilog offset tracker, used to filter out broken stack samples +// PrologEpilog address tracker, used to filter out broken stack samples // Currently we use a heuristic size (two) to infer prolog and epilog // based on the start address and return address. In the future, // we will switch to Dwarf CFI based tracker struct PrologEpilogTracker { - // A set of prolog and epilog offsets. Used by virtual unwinding. + // A set of prolog and epilog addresses. Used by virtual unwinding. std::unordered_set PrologEpilogSet; ProfiledBinary *Binary; PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){}; // Take the two addresses from the start of function as prolog - void inferPrologOffsets(std::map &FuncStartOffsetMap) { - for (auto I : FuncStartOffsetMap) { + void + inferPrologAddresses(std::map &FuncStartAddressMap) { + for (auto I : FuncStartAddressMap) { PrologEpilogSet.insert(I.first); InstructionPointer IP(Binary, I.first); if (!IP.advance()) break; - PrologEpilogSet.insert(IP.Offset); + PrologEpilogSet.insert(IP.Address); } } // Take the last two addresses before the return address as epilog - void inferEpilogOffsets(std::unordered_set &RetAddrs) { + void inferEpilogAddresses(std::unordered_set &RetAddrs) { for (auto Addr : RetAddrs) { PrologEpilogSet.insert(Addr); InstructionPointer IP(Binary, Addr); if (!IP.backward()) break; - PrologEpilogSet.insert(IP.Offset); + PrologEpilogSet.insert(IP.Address); } } }; @@ -183,7 +180,7 @@ private: ContextTrieNode RootContext; }; -using OffsetRange = std::pair; +using AddressRange = std::pair; class ProfiledBinary { // Absolute path of the executable binary. @@ -221,28 +218,28 @@ class ProfiledBinary { // A list of binary functions that have samples. std::unordered_set ProfiledFunctions; - // An ordered map of mapping function's start offset to function range - // relevant info. Currently to determine if the offset of ELF is the start of + // An ordered map of mapping function's start address to function range + // relevant info. Currently to determine if the address of ELF is the start of // a real function, we leverage the function range info from DWARF. - std::map StartOffset2FuncRangeMap; + std::map StartAddrToFuncRangeMap; - // Offset to context location map. Used to expand the context. - std::unordered_map Offset2LocStackMap; + // Address to context location map. Used to expand the context. + std::unordered_map AddressToLocStackMap; - // Offset to instruction size map. Also used for quick offset lookup. - std::unordered_map Offset2InstSizeMap; + // Address to instruction size map. Also used for quick Address lookup. + std::unordered_map AddressToInstSizeMap; - // An array of offsets of all instructions sorted in increasing order. The + // An array of Addresses of all instructions sorted in increasing order. The // sorting is needed to fast advance to the next forward/backward instruction. - std::vector CodeAddrOffsets; - // A set of call instruction offsets. Used by virtual unwinding. - std::unordered_set CallOffsets; - // A set of return instruction offsets. Used by virtual unwinding. - std::unordered_set RetOffsets; - // An ordered set of unconditional branch instruction offsets. - std::set UncondBranchOffsets; - // A set of branch instruction offsets. - std::unordered_set BranchOffsets; + std::vector CodeAddressVec; + // A set of call instruction addresses. Used by virtual unwinding. + std::unordered_set CallAddressSet; + // A set of return instruction addresses. Used by virtual unwinding. + std::unordered_set RetAddressSet; + // An ordered set of unconditional branch instruction addresses. + std::set UncondBranchAddrSet; + // A set of branch instruction addresses. + std::unordered_set BranchAddressSet; // Estimate and track function prolog and epilog ranges. PrologEpilogTracker ProEpilogTracker; @@ -302,9 +299,9 @@ class ProfiledBinary { void loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit); // A function may be spilt into multiple non-continuous address ranges. We use - // this to set whether start offset of a function is the real entry of the + // this to set whether start address of a function is the real entry of the // function and also set false to the non-function label. - void setIsFuncEntry(uint64_t Offset, StringRef RangeSymName); + void setIsFuncEntry(uint64_t Address, StringRef RangeSymName); // Warn if no entry range exists in the function. void warnNoFuncEntry(); @@ -341,17 +338,15 @@ public: void decodePseudoProbe(); - uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const { - return VirtualAddress - BaseAddress; - } - uint64_t offsetToVirtualAddr(uint64_t Offset) const { - return Offset + BaseAddress; - } StringRef getPath() const { return Path; } StringRef getName() const { return llvm::sys::path::filename(Path); } uint64_t getBaseAddress() const { return BaseAddress; } void setBaseAddress(uint64_t Address) { BaseAddress = Address; } + // Canonicalize to use preferred load address as base address. + uint64_t canonicalizeVirtualAddress(uint64_t Address) { + return Address - BaseAddress + getPreferredBaseAddress(); + } // Return the preferred load address for the first executable segment. uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; } // Return the preferred load address for the first loadable segment. @@ -365,64 +360,54 @@ public: return TextSegmentOffsets; } - uint64_t getInstSize(uint64_t Offset) const { - auto I = Offset2InstSizeMap.find(Offset); - if (I == Offset2InstSizeMap.end()) + uint64_t getInstSize(uint64_t Address) const { + auto I = AddressToInstSizeMap.find(Address); + if (I == AddressToInstSizeMap.end()) return 0; return I->second; } - bool offsetIsCode(uint64_t Offset) const { - return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end(); - } bool addressIsCode(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); - return offsetIsCode(Offset); + return AddressToInstSizeMap.find(Address) != AddressToInstSizeMap.end(); } + bool addressIsCall(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); - return CallOffsets.count(Offset); + return CallAddressSet.count(Address); } bool addressIsReturn(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); - return RetOffsets.count(Offset); + return RetAddressSet.count(Address); } bool addressInPrologEpilog(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); - return ProEpilogTracker.PrologEpilogSet.count(Offset); + return ProEpilogTracker.PrologEpilogSet.count(Address); } - bool offsetIsTransfer(uint64_t Offset) { - return BranchOffsets.count(Offset) || RetOffsets.count(Offset) || - CallOffsets.count(Offset); + bool addressIsTransfer(uint64_t Address) { + return BranchAddressSet.count(Address) || RetAddressSet.count(Address) || + CallAddressSet.count(Address); } bool rangeCrossUncondBranch(uint64_t Start, uint64_t End) { if (Start >= End) return false; - auto R = UncondBranchOffsets.lower_bound(Start); - return R != UncondBranchOffsets.end() && *R < End; + auto R = UncondBranchAddrSet.lower_bound(Start); + return R != UncondBranchAddrSet.end() && *R < End; } uint64_t getAddressforIndex(uint64_t Index) const { - return offsetToVirtualAddr(CodeAddrOffsets[Index]); + return CodeAddressVec[Index]; } - size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); } + size_t getCodeAddrVecSize() const { return CodeAddressVec.size(); } bool usePseudoProbes() const { return UsePseudoProbes; } bool useFSDiscriminator() const { return UseFSDiscriminator; } - // Get the index in CodeAddrOffsets for the address + // Get the index in CodeAddressVec for the address // As we might get an address which is not the code // here it would round to the next valid code address by // using lower bound operation - uint32_t getIndexForOffset(uint64_t Offset) const { - auto Low = llvm::lower_bound(CodeAddrOffsets, Offset); - return Low - CodeAddrOffsets.begin(); - } uint32_t getIndexForAddr(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); - return getIndexForOffset(Offset); + auto Low = llvm::lower_bound(CodeAddressVec, Address); + return Low - CodeAddressVec.begin(); } uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const { @@ -435,29 +420,29 @@ public: return 0; } - FuncRange *findFuncRangeForStartOffset(uint64_t Offset) { - auto I = StartOffset2FuncRangeMap.find(Offset); - if (I == StartOffset2FuncRangeMap.end()) + FuncRange *findFuncRangeForStartAddr(uint64_t Address) { + auto I = StartAddrToFuncRangeMap.find(Address); + if (I == StartAddrToFuncRangeMap.end()) return nullptr; return &I->second; } - // Binary search the function range which includes the input offset. - FuncRange *findFuncRangeForOffset(uint64_t Offset) { - auto I = StartOffset2FuncRangeMap.upper_bound(Offset); - if (I == StartOffset2FuncRangeMap.begin()) + // Binary search the function range which includes the input address. + FuncRange *findFuncRange(uint64_t Address) { + auto I = StartAddrToFuncRangeMap.upper_bound(Address); + if (I == StartAddrToFuncRangeMap.begin()) return nullptr; I--; - if (Offset >= I->second.EndOffset) + if (Address >= I->second.EndAddress) return nullptr; return &I->second; } // Get all ranges of one function. - RangesTy getRangesForOffset(uint64_t Offset) { - auto *FRange = findFuncRangeForOffset(Offset); + RangesTy getRanges(uint64_t Address) { + auto *FRange = findFuncRange(Address); // Ignore the range which falls into plt section or system lib. if (!FRange) return RangesTy(); @@ -493,17 +478,17 @@ public: void populateSymbolListFromDWARF(ProfileSymbolList &SymbolList); const SampleContextFrameVector & - getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) { - auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector()); + getFrameLocationStack(uint64_t Address, bool UseProbeDiscriminator = false) { + auto I = AddressToLocStackMap.emplace(Address, SampleContextFrameVector()); if (I.second) { - InstructionPointer IP(this, Offset); + InstructionPointer IP(this, Address); I.first->second = symbolize(IP, true, UseProbeDiscriminator); } return I.first->second; } - Optional getInlineLeafFrameLoc(uint64_t Offset) { - const auto &Stack = getFrameLocationStack(Offset); + Optional getInlineLeafFrameLoc(uint64_t Address) { + const auto &Stack = getFrameLocationStack(Address); if (Stack.empty()) return {}; return Stack.back(); @@ -515,15 +500,15 @@ public: bool inlineContextEqual(uint64_t Add1, uint64_t Add2); // Get the full context of the current stack with inline context filled in. - // It will search the disassembling info stored in Offset2LocStackMap. This is - // used as the key of function sample map + // It will search the disassembling info stored in AddressToLocStackMap. This + // is used as the key of function sample map SampleContextFrameVector getExpandedContext(const SmallVectorImpl &Stack, bool &WasLeafInlined); // Go through instructions among the given range and record its size for the // inline context. - void computeInlinedContextSizeForRange(uint64_t StartOffset, - uint64_t EndOffset); + void computeInlinedContextSizeForRange(uint64_t StartAddress, + uint64_t EndAddress); void computeInlinedContextSizeForFunc(const BinaryFunction *Func);