From b3154d11bc6dee59e581b731b7561f1ebab3aed6 Mon Sep 17 00:00:00 2001 From: wlei Date: Mon, 23 Nov 2020 20:33:23 -0800 Subject: [PATCH] [CSSPGO][llvm-profgen] Pseudo probe decoding and disassembling MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This change implements pseudo probe decoding and disassembling for llvm-profgen/CSSPGO. Please see https://groups.google.com/g/llvm-dev/c/1p1rdYbL93s and https://reviews.llvm.org/D89707 for more context about CSSPGO and llvm-profgen. **ELF section format** Please see the encoding patch(https://reviews.llvm.org/D91878) for more details of the format, just copy the example here: Two section(`.pseudo_probe_desc` and  `.pseudoprobe` ) is emitted in ELF to support pseudo probe. The format of `.pseudo_probe_desc` section looks like: ``` .section .pseudo_probe_desc,"",@progbits .quad 6309742469962978389 // Func GUID .quad 4294967295 // Func Hash .byte 9 // Length of func name .ascii "_Z5funcAi" // Func name .quad 7102633082150537521 .quad 138828622701 .byte 12 .ascii "_Z8funcLeafi" .quad 446061515086924981 .quad 4294967295 .byte 9 .ascii "_Z5funcBi" .quad -2016976694713209516 .quad 72617220756 .byte 7 .ascii "_Z3fibi" ``` For each `.pseudoprobe` section, the encoded binary data consists of a single function record corresponding to an outlined function (i.e, a function with a code entry in the `.text` section). A function record has the following format : ``` FUNCTION BODY (one for each outlined function present in the text section) GUID (uint64) GUID of the function NPROBES (ULEB128) Number of probes originating from this function. NUM_INLINED_FUNCTIONS (ULEB128) Number of callees inlined into this function, aka number of first-level inlinees PROBE RECORDS A list of NPROBES entries. Each entry contains: INDEX (ULEB128) TYPE (uint4) 0 - block probe, 1 - indirect call, 2 - direct call ATTRIBUTE (uint3) reserved ADDRESS_TYPE (uint1) 0 - code address, 1 - address delta CODE_ADDRESS (uint64 or ULEB128) code address or address delta, depending on ADDRESS_TYPE INLINED FUNCTION RECORDS A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined callees. Each record contains: INLINE SITE GUID of the inlinee (uint64) ID of the callsite probe (ULEB128) FUNCTION BODY A FUNCTION BODY entry describing the inlined function. ``` **Disassembling** A switch `--show-pseudo-probe` is added to use along with `--show-disassembly` to print disassembly code with pseudo probe directives. For example: ``` 00000000002011a0 : 2011a0: 50 push rax 2011a1: 85 ff test edi,edi [Probe]: FUNC: foo2 Index: 1 Type: Block 2011a3: 74 02 je 2011a7 [Probe]: FUNC: foo2 Index: 3 Type: Block [Probe]: FUNC: foo2 Index: 4 Type: Block [Probe]: FUNC: foo Index: 1 Type: Block Inlined: @ foo2:6 2011a5: 58 pop rax 2011a6: c3 ret [Probe]: FUNC: foo2 Index: 2 Type: Block 2011a7: bf 01 00 00 00 mov edi,0x1 [Probe]: FUNC: foo2 Index: 5 Type: IndirectCall 2011ac: ff d6 call rsi [Probe]: FUNC: foo2 Index: 4 Type: Block 2011ae: 58 pop rax 2011af: c3 ret ``` **Implementation** - `PseudoProbeDecoder` is added in ProfiledBinary as an infra for the decoding. It decoded the two section and generate two map: `GUIDProbeFunctionMap` stores all the `PseudoProbeFunction` which is the abstraction of a general function. `AddressProbesMap` stores all the pseudo probe info indexed by its address. - All the inline info is encoded into binary as a trie(`PseudoProbeInlineTree`) and will be constructed from the decoding. Each pseudo probe can get its inline context(`getInlineContext`) by traversing its inline tree node backwards. Test Plan: ninja & ninja check-llvm Differential Revision: https://reviews.llvm.org/D92334 --- .../Inputs/inline-cs-pseudoprobe.perfbin | Bin 0 -> 13432 bytes .../tools/llvm-profgen/pseudoprobe-decoding.test | 121 +++++++++ llvm/tools/llvm-profgen/CMakeLists.txt | 1 + llvm/tools/llvm-profgen/ProfiledBinary.cpp | 33 +++ llvm/tools/llvm-profgen/ProfiledBinary.h | 7 + llvm/tools/llvm-profgen/PseudoProbe.cpp | 297 +++++++++++++++++++++ llvm/tools/llvm-profgen/PseudoProbe.h | 209 +++++++++++++++ 7 files changed, 668 insertions(+) create mode 100755 llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin create mode 100644 llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test create mode 100644 llvm/tools/llvm-profgen/PseudoProbe.cpp create mode 100644 llvm/tools/llvm-profgen/PseudoProbe.h diff --git a/llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin new file mode 100755 index 0000000000000000000000000000000000000000..2b5fc0a9dfdd1df5400e6202f552b30e7e22af47 GIT binary patch literal 13432 zcmeHNdvF`adEWzYdY>nwhYdUcp zMX|+x-`@8DhXc-*oYv_dUGnbR{e7?9Z};~07W>Km=zvF21SgNURbc7J^_qlw%d21&Vr)68}+;COJ%hOO}LS$~8oa zdZnaM)}r@dg&;8%jaC`;RJe{1pWDau0@Z`5T@O`y>Mc7xEtePckS!t)G zCx|u^TnsDK6RUy#hRNSB`D1#5{4wQtj$AYovLD{5p45IaBZf(DIR*#Pa#bs%eX=A3 zQ}lu9BcMm7EZ{2dsE^lKiuYOSccvQIVaop6(y6ho&bD--HJ!>7rdy}CceQqP2J_iq zhqQ;`!!`}xv0qxi@kK6V+3kiptEwVh8xyqbNiog)7%s)HO20a3Ui z1-%9ikf7^JXk(>XWflM(FzZWHv*H zF7%<%J$k~(8RMzEY2-%t>`rGh#%OFTZCJdjoT*$YV)EQj~a|qA}fSN%2pc5YingIGZ(C2}E56du6UIF?~KszDeYaowFxqn0`(~ZiF z%WLcKSw$S@vPJ0RHWcWqmEDbaCtP*l$6imAjl%T=;2Ka74m5tovwKC&-Cz)k$WOzC z{NDvCJV!}jj}$=ugK#xMJwH*#-z)ijfISY^6z~n?4+I*&;OP&n`@FY5pw0RE1I-Vr zkwE)rY9fK{$7=@zJ(AqxM7x|pp1+tM(pEm24E;Ov4K|k|EK|Unqw{Aw#awT%+iZ?R57&)_WTSPe*dHRaE z1`NCf-Gv6~mM$b4*7ue4(f`b@6`(5$tM^A{FD~4_w6t_i{nahdr-$cHfFF=f1{>Bt zfNo{`UjF@3eNiR#cYkv7{ht$yf4|Bfm!}EU$Z583aK!KN6Hmn!N z&)t0djPj{pDjQ!HW_0$+Na}LxQ`b*K&b+G4o^QGI^n!Bf%tFo4(|32DF@G@B@Mlj& z&b+i?&NuQ`Xs6{n%9FE;zj)^OWu>9-oN|8lT+vqH>sXUhLe)g*^uhp0ABL*;2%G;G z17PC4@qZ3Hg-#!aj>Y(f?;M)_=kS4{**C)b!?SONVMJ{DHardcBXg}t_eEPSMvlKy zw=f85kuz_3&Gn&Ysehxhm!q?9^v%8+URv|7k>m49r2F3rFGY@@t-Jeh_@40L@JGY? z{E71ZnFllI7i~h{zXp#C_PuB6Db$-U@ni{vZE0!iOB-NxZh8go7{8@WSe`(M7caob zXk4pF=^mmGXqj+6$8?=Vig%n_Firb3N%{SKM3#i0*SAv7@bRJY@#&OHBonvA8jo%^nTLvy*L!m>EM(&X~&P z+p_sd1IorUyhSsybXz)o-(>4lE_<&LH??hBV_TDAP5!QH!eXw9(BYSFd_w**64M|-F{6zT|R z%_Bxai^NO|Z-x2_f#XTO=%9lJ!yAN?D98So zR`|_drQd6mV}DF5{2Ev3_ZsEcA5(me%lbDKDt?b4D*;))PKsVKl7Qc2Y)hI z<4MgGGMN-Snd9-eV>k-ol#OLF*|Ainsx101+1)(2_fE079{`>|aWOp@-t0k{1}O+P ziG2W;&+(%=Oc41AA@I9FCQCk^)Zr;N;VFk{11CM6-^kv;-jUIvfkR>mb*{HG*{)nQ zA3Ugs_wDNksl9jX1HPj^wK)It9B$d%fZ=x9%1)+CJ(-J58v0ZgR_8_zWO(kSy41iD zPOJnJ-j7z2ZP;M7FK5KksqqXl-1_Q-1 zyF6zzttP|o^D8tjSCip){BnB7R+D*z#?%0$i2|53~n|T5i z-|Kn8W$cx;;qAuWud82eJn?&r>ARKTaqWGZvCHFnCCP02#!XEhF06k34}UzefbUH2 zSS2%=Zs-fxQ5zJiLV@aBmBfB1S6hk666Y{RzsxMHDK^S=kzZ z6*0`&8(?0-1X5O}bpIM<|Ar{Iuhgn=udGxy1y-xxK;zmQ11ptP@WS*20bEtnv+j$% zO(A$edsX3ChZ$c`@dkGIclzJoG_?A*CJjE0R8?5e5b$d@Lg7}wAMn~v;aQ71bvG%Y z)oAXZ|B(Mq|2KQVK|R~CY%J@v+3?$(v*p{Gph>qw%dM}eIt9OqmJ9qI$Mi>V4byyg zGe~*3paDZU5QV!}*?&-y(Kry{dk$`j7r%rQ^*@Sv742KFsKJ?2^?9bqFm4(t2j<}} zKNY`=nIX9S2(CJGaf$@7R?3R&28HJLDkFBU*2k){Oj(9ck>W@9%W#2WWrcr{OyCBT za$qgqSE3Mnb3>vT`(A`sdDnvd+Q%R-Yw}ab6l-!DNxn@@U`r;O6~|!9G%sLz0~c%) z8^O4cyPE$uKHl0UvXe&J@1%`X+nv$Y(OCYd{0xNkVRGEaw3$ZUY@Ny*g+vy=495%$ zNf_~L0#>+UTEKK6uycJvgS(b|Ph2jLmW2LY(t2uhFvr4OlM6xiP7 zL6V2D^!=HcLp2ks@-0x$w_s+%<+I{Teh-@%Y6hUpc-J zuE4yq=dv;1(vF*KLd+a=}05g3r0&CtdLGy5KLk;I;CV9uUrYYA*OL!kz8C zhwvZLyN}y@%mx3Fi+#6o^$n0$U^ik|)!4M8ACvu6Io?*`K=rJv7>D1NcxAu;gmCA+ z>dz#;MmTW;aF3^*Kf06@l(P%&IaKg zP((ZSr3$iY0oQOBg`Nu%7tMgLgdg=6eVT;8tW4Xa9V*z!5a5kf>>L1Gv&nJx<1YA@ zUGUR@WBrHexn`SZ3Fp@*F845|oXB8)K_bX`KQO5*4XQX3jfO1o56LMMH7#Bffg0`b35+KPX5j0_oTtHn? za8CjeL8*pWJ(kPGj#*63gZ0>CDh_qXf`Vn*oP^Cd_?cQa<9= zgkuK$)}DvPB#g1bxSq}yv$3(UoN*sBQki6y^D@vR4=dhsb0P?R2T=ez0fJV}q9RDU zp*qGI7RrvYZe$Wp%!*FA9?Dh2gQa*?QE+1hi(#yy62SYH3Qz^r$O38_v7>=WO&di&IWD>%z>u`ufj}Gk{)pb!iS)%hpn-_ef#0Kel zBCC(5vtzNeo-nhyydEn|3wV;J(uQdyg4;T}JHc`lA-yzlK-0|JF=1_jCkm63$3Vqa zk`ID4uaIc*bE+6gBb}^xUP>4SuN06HUOsM0xD2+Oak!^Syei|R#FUDaW2GceIZjGE zsa!EeN_@xoC^4j>V{BLlh~oKz6&7i>6aqS5RAiUM$Hv4?su)it(~^z4x;`)x-qWx5 z-_Zx7$6-LYj6h+XX0nUplxOV>;2VSagVDXc;i$fMU|?VWs6HC*jrQXSsJ+044{*JZ zH?RhkC#LK+9LJ}mBRK{^69+!k{9PTZf&re->Jza{0#0AyUI1=+&{-?pK6l|^Z7qM7pMWxo_L+b z7;YxPwp>x4&o?ZP!4o7XccvhZ6w^A3$lnI>>;U`c^99p#U0?cbjEY4+EdN2UgJ%)A z%;ymvAO(&5uzzR$cLM(nn?9efc$D-xe(ay?Pi93wWMC;*tQVihI79ZW_W8SGh z4j7KV4!AJ!dVG=e`5cPHE$MSvZk0lT>t8zeYDx65AW`-tN#TQ-lpN1_LOpjTlcz5dmv5WrwxG;dU z*P-Ajia#fP=lJ`3eAz&H!6L;w>off}=@$hk^F))Cx#(c99>eEd^dGvxl6Zmzi8}Rv z1c2ieAtfU;@t>ymNDH7l;Ns_(nkW7p^!eE(7n0Py&nhiC96E|(!A1YlEtdY9^d4l} HPW}G|C?3-@ literal 0 HcmV?d00001 diff --git a/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test b/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test new file mode 100644 index 0000000..5feaa97 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test @@ -0,0 +1,121 @@ +; RUN: llvm-profgen --perfscript=%s --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-pseudo-probe --show-disassembly | FileCheck %s + +PERF_RECORD_MMAP2 2854748/2854748: [0x400000(0x1000) @ 0 00:1d 123291722 526021]: r-xp /home/inline-cs-pseudoprobe.perfbin + +; CHECK: Pseudo Probe Desc: +; CHECK: GUID: 6699318081062747564 Name: foo +; CHECK: Hash: 138950591924 +; CHECK: GUID: 15822663052811949562 Name: main +; CHECK: Hash: 4294967295 +; CHECK: GUID: 16434608426314478903 Name: bar +; CHECK: Hash: 72617220756 + + + +; CHECK: : + +; CHECK: [Probe]: FUNC: bar Index: 1 Type: Block +; CHECK-NEXT: 754: imull $2863311531, %edi, %eax + +; CHECK: [Probe]: FUNC: bar Index: 2 Type: Block Dangling +; CHECK-NEXT: [Probe]: FUNC: bar Index: 3 Type: Block Dangling +; CHECK-NEXT: 768: cmovbl %esi, %ecx + +; CHECK: [Probe]: FUNC: bar Index: 4 Type: Block +; CHECK-NEXT: 76e: popq %rbp + + +; CHECK: : +; CHECK: [Probe]: FUNC: foo Index: 1 Type: Block +; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block +; CHECK-NEXT: 770: movl $1, %ecx + +; CHECK: [Probe]: FUNC: foo Index: 5 Type: Block +; CHECK-NEXT: 780: addl $30, %esi +; CHECK: [Probe]: FUNC: foo Index: 6 Type: Block +; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block +; CHECK-NEXT: 783: addl $1, %ecx + +; CHECK: [Probe]: FUNC: foo Index: 3 Type: Block +; CHECK-NEXT: 7a9: cmpl %eax, %ecx + +; CHECK: [Probe]: FUNC: foo Index: 4 Type: Block +; CHECK-NEXT: [Probe]: FUNC: bar Index: 1 Type: Block Inlined: @ foo:8 +; CHECK-NEXT: 7bf: addl %ecx, %edx + +; CHECK: [Probe]: FUNC: bar Index: 2 Type: Block Dangling Inlined: @ foo:8 +; CHECK-NEXT: [Probe]: FUNC: bar Index: 3 Type: Block Dangling Inlined: @ foo:8 +; CHECK-NEXT: 7c8: cmovel %esi, %eax + +; CHECK: [Probe]: FUNC: bar Index: 4 Type: Block Inlined: @ foo:8 +; CHECK-NEXT: 7cd: movl %eax, %esi +; CHECK: [Probe]: FUNC: foo Index: 6 Type: Block +; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block + +; CHECK: [Probe]: FUNC: foo Index: 7 Type: Block +; CHECK-NEXT: 7de: movl $2098432, %edi + +; CHECK: [Probe]: FUNC: foo Index: 9 Type: DirectCall +; CHECK-NEXT: 7e5: callq 0x930 + + +; CHECK:
: +; CHECK: [Probe]: FUNC: main Index: 1 Type: Block +; CHECK-NEXT: [Probe]: FUNC: foo Index: 1 Type: Block Inlined: @ main:2 +; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block Inlined: @ main:2 +; CHECK-NEXT: 7f0: movl $1, %ecx + +; CHECK: [Probe]: FUNC: foo Index: 5 Type: Block Inlined: @ main:2 +; CHECK-NEXT: 800: addl $30, %esi +; CHECK: [Probe]: FUNC: foo Index: 6 Type: Block Inlined: @ main:2 +; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block Inlined: @ main:2 +; CHECK-NEXT: 803: addl $1, %ecx + +; CHECK: [Probe]: FUNC: foo Index: 3 Type: Block Inlined: @ main:2 +; CHECK-NEXT: 829: cmpl %eax, %ecx + +; CHECK: [Probe]: FUNC: foo Index: 4 Type: Block Inlined: @ main:2 +; CHECK-NEXT: [Probe]: FUNC: bar Index: 1 Type: Block Inlined: @ main:2 @ foo:8 +; CHECK-NEXT: 83f: addl %ecx, %edx + +; CHECK: [Probe]: FUNC: bar Index: 2 Type: Block Dangling Inlined: @ main:2 @ foo:8 +; CHECK-NEXT: [Probe]: FUNC: bar Index: 3 Type: Block Dangling Inlined: @ main:2 @ foo:8 +; CHECK-NEXT: 848: cmovel %esi, %eax + +; CHECK: [Probe]: FUNC: bar Index: 4 Type: Block Inlined: @ main:2 @ foo:8 +; CHECK-NEXT: 84d: movl %eax, %esi +; CHECK: [Probe]: FUNC: foo Index: 6 Type: Block Inlined: @ main:2 +; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block Inlined: @ main:2 +; CHECK-NEXT: 84f: addl $1, %ecx + +; CHECK: [Probe]: FUNC: foo Index: 7 Type: Block Inlined: @ main:2 +; CHECK-NEXT: 85e: movl $2098432, %edi + +; CHECK: [Probe]: FUNC: foo Index: 9 Type: DirectCall Inlined: @ main:2 +; CHECK-NEXT: 865: callq 0x930 + + +; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling +; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls +; -g test.c -o a.out + +#include + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/CMakeLists.txt b/llvm/tools/llvm-profgen/CMakeLists.txt index 4379a8b..e7705eb 100644 --- a/llvm/tools/llvm-profgen/CMakeLists.txt +++ b/llvm/tools/llvm-profgen/CMakeLists.txt @@ -17,4 +17,5 @@ add_llvm_tool(llvm-profgen PerfReader.cpp ProfiledBinary.cpp ProfileGenerator.cpp + PseudoProbe.cpp ) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 37b71fd..96080e9 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -29,6 +29,10 @@ static cl::opt ShowSourceLocations("show-source-locations", cl::ZeroOrMore, cl::desc("Print source locations.")); +static cl::opt ShowPseudoProbe( + "show-pseudo-probe", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, + cl::desc("Print pseudo probe section and disassembled info.")); + namespace llvm { namespace sampleprof { @@ -93,6 +97,9 @@ void ProfiledBinary::load() { // Find the preferred base address for text sections. setPreferredBaseAddress(Obj); + // Decode pseudo probe related section + decodePseudoProbe(Obj); + // Disassemble the text sections. disassemble(Obj); @@ -165,6 +172,28 @@ void ProfiledBinary::setPreferredBaseAddress(const ELFObjectFileBase *Obj) { exitWithError("no text section found", Obj->getFileName()); } +void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { + StringRef FileName = Obj->getFileName(); + for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); + SI != SE; ++SI) { + const SectionRef &Section = *SI; + StringRef SectionName = unwrapOrError(Section.getName(), FileName); + + if (SectionName == ".pseudo_probe_desc") { + StringRef Contents = unwrapOrError(Section.getContents(), FileName); + ProbeDecoder.buildGUID2FuncDescMap( + reinterpret_cast(Contents.data()), Contents.size()); + } else if (SectionName == ".pseudo_probe") { + StringRef Contents = unwrapOrError(Section.getContents(), FileName); + ProbeDecoder.buildAddress2ProbeMap( + reinterpret_cast(Contents.data()), Contents.size()); + } + } + + if (ShowPseudoProbe) + ProbeDecoder.printGUID2FuncDescMap(outs()); +} + bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, SectionSymbolsTy &Symbols, const SectionRef &Section) { @@ -193,6 +222,10 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, return false; if (ShowDisassembly) { + if (ShowPseudoProbe) { + ProbeDecoder.printProbeForAddress(outs(), + Offset + PreferredBaseAddress); + } outs() << format("%8" PRIx64 ":", Offset); size_t Start = outs().tell(); IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs()); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index add1a22..6f329336 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -10,6 +10,7 @@ #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H #include "CallContext.h" +#include "PseudoProbe.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/MC/MCAsmInfo.h" @@ -128,8 +129,14 @@ class ProfiledBinary { // The symbolizer used to get inline context for an instruction. std::unique_ptr Symbolizer; + + // Pseudo probe decoder + PseudoProbeDecoder ProbeDecoder; + void setPreferredBaseAddress(const ELFObjectFileBase *O); + void decodePseudoProbe(const ELFObjectFileBase *Obj); + // Set up disassembler and related components. void setUpDisassembler(const ELFObjectFileBase *Obj); void setupSymbolizer(); diff --git a/llvm/tools/llvm-profgen/PseudoProbe.cpp b/llvm/tools/llvm-profgen/PseudoProbe.cpp new file mode 100644 index 0000000..7c683315 --- /dev/null +++ b/llvm/tools/llvm-profgen/PseudoProbe.cpp @@ -0,0 +1,297 @@ +//===--- PseudoProbe.cpp - Pseudo probe decoding utilities ------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PseudoProbe.h" +#include "ErrorHandling.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; +using namespace sampleprof; +using namespace support; + +namespace llvm { +namespace sampleprof { + +static StringRef getProbeFNameForGUID(const GUIDProbeFunctionMap &GUID2FuncMAP, + uint64_t GUID) { + auto It = GUID2FuncMAP.find(GUID); + assert(It != GUID2FuncMAP.end() && + "Probe function must exist for a valid GUID"); + return It->second.FuncName; +} + +void PseudoProbeFuncDesc::print(raw_ostream &OS) { + OS << "GUID: " << FuncGUID << " Name: " << FuncName << "\n"; + OS << "Hash: " << FuncHash << "\n"; +} + +void PseudoProbe::getInlineContext(SmallVector &ContextStack, + const GUIDProbeFunctionMap &GUID2FuncMAP, + bool ShowName) const { + uint32_t Begin = ContextStack.size(); + PseudoProbeInlineTree *Cur = InlineTree; + // It will add the string of each node's inline site during iteration. + // Note that it won't include the probe's belonging function(leaf location) + while (!Cur->hasInlineSite()) { + std::string ContextStr; + if (ShowName) { + StringRef FuncName = + getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite)); + ContextStr += FuncName.str(); + } else { + ContextStr += Twine(std::get<0>(Cur->ISite)).str(); + } + ContextStr += ":"; + ContextStr += Twine(std::get<1>(Cur->ISite)).str(); + ContextStack.emplace_back(ContextStr); + Cur = Cur->Parent; + } + // Make the ContextStack in caller-callee order + std::reverse(ContextStack.begin() + Begin, ContextStack.end()); +} + +std::string +PseudoProbe::getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP, + bool ShowName) const { + std::ostringstream OContextStr; + SmallVector ContextStack; + getInlineContext(ContextStack, GUID2FuncMAP, ShowName); + for (auto &CxtStr : ContextStack) { + if (OContextStr.str().size()) + OContextStr << " @ "; + OContextStr << CxtStr; + } + return OContextStr.str(); +} + +static const char *PseudoProbeTypeStr[3] = {"Block", "IndirectCall", + "DirectCall"}; + +void PseudoProbe::print(raw_ostream &OS, + const GUIDProbeFunctionMap &GUID2FuncMAP, + bool ShowName) { + OS << "FUNC: "; + if (ShowName) { + StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, GUID); + OS << FuncName.str() << " "; + } else { + OS << GUID << " "; + } + OS << "Index: " << Index << " "; + OS << "Type: " << PseudoProbeTypeStr[static_cast(Type)] << " "; + if (isDangling()) { + OS << "Dangling "; + } + if (isTailCall()) { + OS << "TailCall "; + } + std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP, ShowName); + if (InlineContextStr.size()) { + OS << "Inlined: @ "; + OS << InlineContextStr; + } + OS << "\n"; +} + +template T PseudoProbeDecoder::readUnencodedNumber() { + if (Data + sizeof(T) > End) { + exitWithError("Decode unencoded number error in " + SectionName + + " section"); + } + T Val = endian::readNext(Data); + return Val; +} + +template T PseudoProbeDecoder::readUnsignedNumber() { + unsigned NumBytesRead = 0; + uint64_t Val = decodeULEB128(Data, &NumBytesRead); + if (Val > std::numeric_limits::max() || (Data + NumBytesRead > End)) { + exitWithError("Decode number error in " + SectionName + " section"); + } + Data += NumBytesRead; + return static_cast(Val); +} + +template T PseudoProbeDecoder::readSignedNumber() { + unsigned NumBytesRead = 0; + int64_t Val = decodeSLEB128(Data, &NumBytesRead); + if (Val > std::numeric_limits::max() || (Data + NumBytesRead > End)) { + exitWithError("Decode number error in " + SectionName + " section"); + } + Data += NumBytesRead; + return static_cast(Val); +} + +StringRef PseudoProbeDecoder::readString(uint32_t Size) { + StringRef Str(reinterpret_cast(Data), Size); + if (Data + Size > End) { + exitWithError("Decode string error in " + SectionName + " section"); + } + Data += Size; + return Str; +} + +void PseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start, + std::size_t Size) { + // The pseudo_probe_desc section has a format like: + // .section .pseudo_probe_desc,"",@progbits + // .quad -5182264717993193164 // GUID + // .quad 4294967295 // Hash + // .uleb 3 // Name size + // .ascii "foo" // Name + // .quad -2624081020897602054 + // .quad 174696971957 + // .uleb 34 + // .ascii "main" +#ifndef NDEBUG + SectionName = "pseudo_probe_desc"; +#endif + Data = Start; + End = Data + Size; + + while (Data < End) { + uint64_t GUID = readUnencodedNumber(); + uint64_t Hash = readUnencodedNumber(); + uint32_t NameSize = readUnsignedNumber(); + StringRef Name = readString(NameSize); + + // Initialize PseudoProbeFuncDesc and populate it into GUID2FuncDescMap + GUID2FuncDescMap.emplace(GUID, PseudoProbeFuncDesc(GUID, Hash, Name)); + } + assert(Data == End && "Have unprocessed data in pseudo_probe_desc section"); +} + +void PseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start, + std::size_t Size) { + // The pseudo_probe section encodes an inline forest and each tree has a + // format like: + // FUNCTION BODY (one for each uninlined function present in the text + // section) + // GUID (uint64) + // GUID of the function + // NPROBES (ULEB128) + // Number of probes originating from this function. + // NUM_INLINED_FUNCTIONS (ULEB128) + // Number of callees inlined into this function, aka number of + // first-level inlinees + // PROBE RECORDS + // A list of NPROBES entries. Each entry contains: + // INDEX (ULEB128) + // TYPE (uint4) + // 0 - block probe, 1 - indirect call, 2 - direct call + // ATTRIBUTE (uint3) + // 1 - tail call, 2 - dangling + // ADDRESS_TYPE (uint1) + // 0 - code address, 1 - address delta + // CODE_ADDRESS (uint64 or ULEB128) + // code address or address delta, depending on Flag + // INLINED FUNCTION RECORDS + // A list of NUM_INLINED_FUNCTIONS entries describing each of the + // inlined callees. Each record contains: + // INLINE SITE + // GUID of the inlinee (uint64) + // Index of the callsite probe (ULEB128) + // FUNCTION BODY + // A FUNCTION BODY entry describing the inlined function. +#ifndef NDEBUG + SectionName = "pseudo_probe"; +#endif + Data = Start; + End = Data + Size; + + PseudoProbeInlineTree *Root = &DummyInlineRoot; + PseudoProbeInlineTree *Cur = &DummyInlineRoot; + uint64_t LastAddr = 0; + uint32_t Index = 0; + // A DFS-based decoding + while (Data < End) { + // Read inline site for inlinees + if (Root != Cur) { + Index = readUnsignedNumber(); + } + // Switch/add to a new tree node(inlinee) + Cur = Cur->getOrAddNode({Cur->GUID, Index}); + // Read guid + Cur->GUID = readUnencodedNumber(); + // Read number of probes in the current node. + uint32_t NodeCount = readUnsignedNumber(); + // Read number of direct inlinees + Cur->ChildrenToProcess = readUnsignedNumber(); + // Read all probes in this node + for (std::size_t I = 0; I < NodeCount; I++) { + // Read index + uint32_t Index = readUnsignedNumber(); + // Read type | flag. + uint8_t Value = readUnencodedNumber(); + uint8_t Kind = Value & 0xf; + uint8_t Attr = (Value & 0x70) >> 4; + // Read address + uint64_t Addr = 0; + if (Value & 0x80) { + int64_t Offset = readSignedNumber(); + Addr = LastAddr + Offset; + } else { + Addr = readUnencodedNumber(); + } + // Populate Address2ProbesMap + std::vector &ProbeVec = Address2ProbesMap[Addr]; + ProbeVec.emplace_back(Addr, Cur->GUID, Index, PseudoProbeType(Kind), Attr, + Cur); + Cur->addProbes(&ProbeVec.back()); + LastAddr = Addr; + } + + // Look for the parent for the next node by subtracting the current + // node count from tree counts along the parent chain. The first node + // in the chain that has a non-zero tree count is the target. + while (Cur != Root) { + if (Cur->ChildrenToProcess == 0) { + Cur = Cur->Parent; + if (Cur != Root) { + assert(Cur->ChildrenToProcess > 0 && + "Should have some unprocessed nodes"); + Cur->ChildrenToProcess -= 1; + } + } else { + break; + } + } + } + + assert(Data == End && "Have unprocessed data in pseudo_probe section"); + assert(Cur == Root && + " Cur should point to root when the forest is fully built up"); +} + +void PseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) { + OS << "Pseudo Probe Desc:\n"; + // Make the output deterministic + std::map OrderedMap(GUID2FuncDescMap.begin(), + GUID2FuncDescMap.end()); + for (auto &I : OrderedMap) { + I.second.print(OS); + } +} + +void PseudoProbeDecoder::printProbeForAddress(raw_ostream &OS, + uint64_t Address) { + auto It = Address2ProbesMap.find(Address); + if (It != Address2ProbesMap.end()) { + for (auto &Probe : It->second) { + OS << " [Probe]:\t"; + Probe.print(OS, GUID2FuncDescMap, true); + } + } +} + +} // end namespace sampleprof +} // end namespace llvm diff --git a/llvm/tools/llvm-profgen/PseudoProbe.h b/llvm/tools/llvm-profgen/PseudoProbe.h new file mode 100644 index 0000000..8a5f3cf --- /dev/null +++ b/llvm/tools/llvm-profgen/PseudoProbe.h @@ -0,0 +1,209 @@ +//===--- PseudoProbe.h - Pseudo probe decoding utilities ---------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TOOLS_LLVM_PROFGEN_PSEUDOPROBE_H +#define LLVM_TOOLS_LLVM_PROFGEN_PSEUDOPROBE_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/PseudoProbe.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/SampleProfileProbe.h" +#include +#include +#include +#include +#include +#include +#include + +namespace llvm { +namespace sampleprof { + +enum PseudoProbeAttributes { TAILCALL = 1, DANGLING = 2 }; + +// Use func GUID and index as the location info of the inline site +using InlineSite = std::tuple; + +struct PseudoProbe; + +// Tree node to represent the inline relation and its inline site, we use a +// dummy root in the PseudoProbeDecoder to lead the tree, the outlined +// function will directly be the children of the dummy root. For the inlined +// function, all the inlinee will be connected to its inlineer, then further to +// its outlined function. Pseudo probes originating from the function stores the +// tree's leaf node which we can process backwards to get its inline context +class PseudoProbeInlineTree { + std::vector ProbeVector; + + struct InlineSiteHash { + uint64_t operator()(const InlineSite &Site) const { + return std::get<0>(Site) ^ std::get<1>(Site); + } + }; + std::unordered_map, + InlineSiteHash> + Children; + +public: + // Inlinee function GUID + uint64_t GUID = 0; + // Inline site to indicate the location in its inliner. As the node could also + // be an outlined function, it will use a dummy InlineSite whose GUID and + // Index is 0 connected to the dummy root + InlineSite ISite; + // Used for decoding + uint32_t ChildrenToProcess = 0; + // Caller node of the inline site + PseudoProbeInlineTree *Parent; + + PseudoProbeInlineTree(){}; + PseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; + + PseudoProbeInlineTree *getOrAddNode(const InlineSite &Site) { + auto Ret = + Children.emplace(Site, std::make_unique(Site)); + Ret.first->second->Parent = this; + return Ret.first->second.get(); + } + + void addProbes(PseudoProbe *Probe) { ProbeVector.push_back(Probe); } + // Return false if it's a dummy inline site + bool hasInlineSite() const { return !std::get<0>(ISite); } +}; + +// Function descriptor decoded from .pseudo_probe_desc section +struct PseudoProbeFuncDesc { + uint64_t FuncGUID = 0; + uint64_t FuncHash = 0; + std::string FuncName; + + PseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name) + : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){}; + + void print(raw_ostream &OS); +}; + +// GUID to PseudoProbeFuncDesc map +using GUIDProbeFunctionMap = std::unordered_map; +// Address to pseudo probes map. +using AddressProbesMap = std::unordered_map>; + +/* +A pseudo probe has the format like below: + INDEX (ULEB128) + TYPE (uint4) + 0 - block probe, 1 - indirect call, 2 - direct call + ATTRIBUTE (uint3) + 1 - tail call, 2 - dangling + ADDRESS_TYPE (uint1) + 0 - code address, 1 - address delta + CODE_ADDRESS (uint64 or ULEB128) + code address or address delta, depending on Flag +*/ +struct PseudoProbe { + uint64_t Address; + uint64_t GUID; + uint32_t Index; + PseudoProbeType Type; + uint8_t Attribute; + PseudoProbeInlineTree *InlineTree; + const static uint32_t PseudoProbeFirstId = + static_cast(PseudoProbeReservedId::Last) + 1; + + PseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K, + uint8_t At, PseudoProbeInlineTree *Tree) + : Address(Ad), GUID(G), Index(I), Type(K), Attribute(At), + InlineTree(Tree){}; + + bool isEntry() const { return Index == PseudoProbeFirstId; } + + bool isDangling() const { + return Attribute & static_cast(PseudoProbeAttributes::DANGLING); + } + + bool isTailCall() const { + return Attribute & static_cast(PseudoProbeAttributes::TAILCALL); + } + + bool isBlock() const { return Type == PseudoProbeType::Block; } + bool isIndirectCall() const { return Type == PseudoProbeType::IndirectCall; } + bool isDirectCall() const { return Type == PseudoProbeType::DirectCall; } + bool isCall() const { return isIndirectCall() || isDirectCall(); } + + // Get the inlined context by traversing current inline tree backwards, + // each tree node has its InlineSite which is taken as the context. + // \p ContextStack is populated in root to leaf order + void getInlineContext(SmallVector &ContextStack, + const GUIDProbeFunctionMap &GUID2FuncMAP, + bool ShowName) const; + // Helper function to get the string from context stack + std::string getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP, + bool ShowName) const; + // Print pseudo probe while disassembling + void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP, + bool ShowName); +}; + +/* +Decode pseudo probe info from ELF section, used along with ELF reader +Two sections are decoded here: + 1) \fn buildGUID2FunctionMap is responsible for .pseudo_probe_desc + section which encodes all function descriptors. + 2) \fn buildAddress2ProbeMap is responsible for .pseudoprobe section + which encodes an inline function forest and each tree includes its + inlined function and all pseudo probes inside the function. +see \file MCPseudoProbe.h for the details of the section encoding format. +*/ +class PseudoProbeDecoder { + // GUID to PseudoProbeFuncDesc map. + GUIDProbeFunctionMap GUID2FuncDescMap; + + // Address to probes map. + AddressProbesMap Address2ProbesMap; + + // The dummy root of the inline trie, all the outlined function will directly + // be the children of the dummy root, all the inlined function will be the + // children of its inlineer. So the relation would be like: + // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2 + PseudoProbeInlineTree DummyInlineRoot; + + /// Points to the current location in the buffer. + const uint8_t *Data = nullptr; + + /// Points to the end of the buffer. + const uint8_t *End = nullptr; + +#ifndef NDEBUG + /// SectionName used for debug + std::string SectionName; +#endif + + // Decoding helper function + template T readUnencodedNumber(); + template T readUnsignedNumber(); + template T readSignedNumber(); + StringRef readString(uint32_t Size); + +public: + // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map. + void buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size); + + // Decode pseudo_probe section to build address to probes map. + void buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size); + + // Print pseudo_probe_desc section info + void printGUID2FuncDescMap(raw_ostream &OS); + + // Print pseudo_probe section info, used along with show-disassembly + void printProbeForAddress(raw_ostream &OS, uint64_t Address); +}; + +} // end namespace sampleprof +} // end namespace llvm + +#endif -- 2.7.4