From: wlei Date: Thu, 23 Sep 2021 03:00:24 +0000 (-0700) Subject: [AutoFDO][llvm-profgen] Profile generation for LBR(non-CS) sample X-Git-Tag: upstream/15.0.7~30547 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d5f2013004ef8d2d9995fd45a154744bf7c264e9;p=platform%2Fupstream%2Fllvm.git [AutoFDO][llvm-profgen] Profile generation for LBR(non-CS) sample This patch introduces non-CS AutoFDO profile generation into LLVM. The profile is supposed to be well consumed by compiler using `-fprofile-sample-use=[profile]`. After range and branch counters are extracted from the LBR sample, here we go through each addresses for symbolization, create FunctionSamples and populate its sub fields like TotalSamples, BodySamples and HeadSamples etc. For inlined code, as we need to map back to original code, so we always add body samples to the leaf frame's function sample. Reviewed By: wenlei, hoy Differential Revision: https://reviews.llvm.org/D109551 --- diff --git a/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe.perfbin new file mode 100755 index 0000000..7c51eec Binary files /dev/null and b/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe.perfbin differ diff --git a/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe.perfscript b/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe.perfscript new file mode 100644 index 0000000..07e08d0 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe.perfscript @@ -0,0 +1,2 @@ + 400691 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/6 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/6 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/5 0x400691/0x400669/P/-/-/6 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/5 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/7 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/5 0x400691/0x400669/P/-/-/6 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/6 0x400691/0x400669/P/-/-/6 + 400691 0x400691/0x400669/P/-/-/6 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/3 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/3 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/3 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/6 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/5 0x400691/0x400669/P/-/-/6 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/6 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/3 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/5 0x400677/0x400650/P/-/-/1 0x400691/0x400669/P/-/-/4 0x400691/0x400669/P/-/-/6 0x400677/0x400650/P/-/-/1 diff --git a/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe2.perfbin b/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe2.perfbin new file mode 100755 index 0000000..fc6c563 Binary files /dev/null and b/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe2.perfbin differ diff --git a/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe2.perfscript b/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe2.perfscript new file mode 100644 index 0000000..f6d332f --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/inline-noprobe2.perfscript @@ -0,0 +1,13 @@ + 4008bf 0x4008bf/0x400870/P/-/-/7 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/12 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/7 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/6 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/9 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/6 0x7f7448e88840/0x7f7448e8885c/P/-/-/2 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889b0/0x7f7448e88826/P/-/-/11 + 7f7448e889e4 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/6 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/7 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 + 400788 0x400788/0x4007a8/P/-/-/22 0x400753/0x400770/M/-/-/1 0x40075b/0x400743/P/-/-/3 0x40075b/0x400743/P/-/-/20 0x40075b/0x400743/P/-/-/3 0x40075b/0x400743/P/-/-/3 0x40075b/0x400743/P/-/-/20 0x40075b/0x400743/P/-/-/16 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/1 0x40075b/0x400743/P/-/-/1 0x40072f/0x400755/P/-/-/3 0x4007a6/0x400710/P/-/-/3 0x4007b8/0x400790/P/-/-/2 0x400788/0x4007a8/P/-/-/24 0x400753/0x400770/M/-/-/1 0x40076e/0x400740/P/-/-/27 0x40075b/0x400743/P/-/-/19 0x40075b/0x400743/M/-/-/2 0x40076e/0x400740/P/-/-/8 0x40076e/0x400740/P/-/-/26 0x40075b/0x400743/M/-/-/28 0x40075b/0x400743/M/-/-/2 0x40076e/0x400740/P/-/-/3 0x40076e/0x400740/P/-/-/4 0x40076e/0x400740/P/-/-/27 0x40076e/0x400740/P/-/-/25 0x40076e/0x400740/P/-/-/46 0x40075b/0x400743/P/-/-/19 0x40076e/0x400740/P/-/-/65 0x40075b/0x400743/M/-/-/7 0x40075f/0x400740/P/-/-/17 + 4007ca 0x4007ca/0x400790/P/-/-/3 0x4007d7/0x4007bd/P/-/-/3 0x4007d7/0x4007cf/P/-/-/3 0x4007d7/0x4007cf/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007ca/0x400790/P/-/-/2 0x4007d7/0x4007bd/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007b8/0x400790/P/-/-/2 0x400788/0x4007a8/P/-/-/30 0x400753/0x400770/M/-/-/16 0x40075b/0x400743/M/-/-/1 0x40072f/0x400755/P/-/-/3 0x4007a6/0x400710/P/-/-/27 0x4007ca/0x400790/P/-/-/3 0x4007d7/0x4007bd/P/-/-/3 0x4007d7/0x4007cf/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007ca/0x400790/P/-/-/2 0x4007d7/0x4007bd/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007b8/0x400790/P/-/-/2 0x400788/0x4007a8/P/-/-/29 0x400753/0x400770/M/-/-/4 0x40075f/0x400740/P/-/-/14 0x40072f/0x400755/P/-/-/5 0x4007a6/0x400710/P/-/-/22 0x4007b8/0x400790/P/-/-/2 0x400788/0x4007a8/P/-/-/26 0x400753/0x400770/M/-/-/1 0x40075b/0x400743/P/-/-/24 0x40075b/0x400743/M/-/-/7 + 400700 0x400700/0x4006b0/P/-/-/47 0x4006bf/0x4006d0/M/-/-/1 0x4006c8/0x4006b0/P/-/-/2 0x4006ea/0x4006b0/P/-/-/26 0x4006bf/0x4006d0/M/-/-/3 0x4006c8/0x4006b0/P/-/-/25 0x4006c8/0x4006b0/P/-/-/44 0x4006ea/0x4006b0/M/-/-/26 0x4006bf/0x4006d0/M/-/-/3 0x4006c8/0x4006b0/P/-/-/23 0x400700/0x4006b0/P/-/-/9 0x4006ca/0x4006ec/P/-/-/39 0x400700/0x4006b0/P/-/-/9 0x4006ca/0x4006ec/P/-/-/37 0x4006ea/0x4006b0/M/-/-/6 0x4006bf/0x4006d0/P/-/-/3 0x4006c8/0x4006b0/P/-/-/23 0x400700/0x4006b0/P/-/-/27 0x4006bf/0x4006d0/P/-/-/1 0x400700/0x4006b0/P/-/-/9 0x4006ca/0x4006ec/P/-/-/40 0x4006ea/0x4006b0/P/-/-/2 0x4006bf/0x4006d0/P/-/-/1 0x4006ea/0x4006b0/P/-/-/25 0x4006bf/0x4006d0/M/-/-/6 0x400689/0x4006b9/P/-/-/10 0x40066d/0x400686/M/-/-/2 0x4007a6/0x400650/P/-/-/3 0x4007ca/0x400790/P/-/-/9 0x4007d7/0x4007bd/P/-/-/3 0x4007d7/0x4007cf/P/-/-/3 0x4007d7/0x4007cf/P/-/-/3 + 4007a6 0x4007a6/0x400650/M/-/-/3 0x4007ca/0x400790/P/-/-/1 0x4007d7/0x4007bd/P/-/-/3 0x4007d7/0x4007cf/P/-/-/2 0x4007d7/0x4007cf/P/-/-/4 0x4007d7/0x4007cf/P/-/-/1 0x4007d7/0x4007cf/P/-/-/2 0x4007d7/0x4007cf/P/-/-/1 0x4007d7/0x4007cf/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007ca/0x400790/P/-/-/1 0x4007d7/0x4007bd/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007b8/0x400790/P/-/-/2 0x4006a2/0x4007a8/P/-/-/4 0x400675/0x400682/P/-/-/3 0x4007a6/0x400650/P/-/-/9 0x4007ca/0x400790/P/-/-/1 0x4007d7/0x4007bd/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007b8/0x400790/P/-/-/2 0x4006a2/0x4007a8/P/-/-/1 0x4006b7/0x40068b/P/-/-/1 0x4006c8/0x4006b0/P/-/-/1 0x400689/0x4006b9/P/-/-/1 0x40066d/0x400686/P/-/-/2 0x4007a6/0x400650/P/-/-/9 0x4007ca/0x400790/P/-/-/8 0x4007d7/0x4007bd/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007b8/0x400790/P/-/-/2 0x4006a2/0x4007a8/P/-/-/3 + 40076e 0x40076e/0x400740/P/-/-/25 0x40075b/0x400743/M/-/-/2 0x40076e/0x400740/P/-/-/5 0x40076e/0x400740/P/-/-/26 0x40076e/0x400740/P/-/-/24 0x40075b/0x400743/P/-/-/1 0x40072f/0x400755/P/-/-/2 0x4007a6/0x400710/P/-/-/4 0x4007b8/0x400790/P/-/-/2 0x400788/0x4007a8/P/-/-/23 0x400753/0x400770/M/-/-/2 0x40076e/0x400740/P/-/-/3 0x40076e/0x400740/P/-/-/4 0x40076e/0x400740/P/-/-/5 0x40076e/0x400740/P/-/-/4 0x40076e/0x400740/P/-/-/26 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40076e/0x400740/P/-/-/25 0x40076e/0x400740/P/-/-/25 0x40076e/0x400740/P/-/-/4 0x40075b/0x400743/P/-/-/1 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40076e/0x400740/P/-/-/49 0x40075b/0x400743/M/-/-/22 0x40075b/0x400743/M/-/-/2 0x40076e/0x400740/P/-/-/25 0x40076e/0x400740/P/-/-/24 0x40075b/0x400743/P/-/-/2 0x40076e/0x400740/P/-/-/48 0x40075b/0x400743/M/-/-/2 + 400870 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/7 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/5 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/6 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 + 4007d7 0x4007d7/0x4007cf/P/-/-/19 0x400792/0x4007d7/M/-/-/1 0x4007ca/0x400790/P/-/-/3 0x4007d7/0x4007bd/P/-/-/2 0x4007d7/0x4007cf/P/-/-/2 0x400792/0x4007d7/P/-/-/1 0x4007ca/0x400790/P/-/-/1 0x4007d7/0x4007bd/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007b8/0x400790/P/-/-/2 0x4006a2/0x4007a8/P/-/-/27 0x4007a6/0x400650/P/-/-/3 0x4007b8/0x400790/P/-/-/2 0x4006a2/0x4007a8/P/-/-/24 0x4006b7/0x40068b/M/-/-/1 0x400700/0x4006b0/P/-/-/48 0x4006bf/0x4006d0/M/-/-/6 0x400689/0x4006b9/P/-/-/8 0x40066d/0x400686/M/-/-/2 0x4007a6/0x400650/P/-/-/3 0x4007b8/0x400790/P/-/-/2 0x4006a2/0x4007a8/P/-/-/30 0x4006b7/0x40068b/M/-/-/19 0x4006c8/0x4006b0/M/-/-/20 0x400700/0x4006b0/P/-/-/6 0x4006bf/0x4006d0/P/-/-/2 0x400689/0x4006b9/P/-/-/21 0x4007a6/0x400650/P/-/-/10 0x4007ca/0x400790/P/-/-/7 0x4007d7/0x4007bd/P/-/-/1 0x400792/0x4007d7/P/-/-/1 0x4007b8/0x400790/P/-/-/2 + 400870 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/7 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/7 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/13 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 + 4008bf 0x4008bf/0x400870/P/-/-/6 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/7 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/7 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 + 40075b 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/1 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/1 0x40075b/0x400743/P/-/-/3 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/3 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/2 0x40075b/0x400743/P/-/-/3 0x40075b/0x400743/P/-/-/19 0x40075b/0x400743/P/-/-/2 0x40072f/0x400755/P/-/-/13 0x4007a6/0x400710/M/-/-/12 0x40082f/0x400790/P/-/-/7 + 4008bf 0x4008bf/0x400870/P/-/-/6 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/7 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 0x7f7448e8899f/0x7f7448e889d8/P/-/-/4 0x7f7448e88821/0x7f7448e88960/P/-/-/1 0x7f7448e887f9/0x7f7448e88815/P/-/-/2 0x7f7448e887e4/0x7f7448e887f2/P/-/-/2 0x7f7448e88cb8/0x7f7448e887c0/P/-/-/1 0x400540/0x7f7448e88cb0/P/-/-/1 0x400870/0x400540/P/-/-/1 0x4008bf/0x400870/P/-/-/7 0x7f7448e88cc1/0x400875/P/-/-/1 0x7f7448e88875/0x7f7448e88cbd/P/-/-/2 0x7f7448e88840/0x7f7448e8885c/P/-/-/1 0x7f7448e8882d/0x7f7448e8883a/P/-/-/1 0x7f7448e889e4/0x7f7448e88826/P/-/-/1 diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe.test b/llvm/test/tools/llvm-profgen/inline-noprobe.test new file mode 100644 index 0000000..39568aa --- /dev/null +++ b/llvm/test/tools/llvm-profgen/inline-noprobe.test @@ -0,0 +1,45 @@ +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-noprobe.perfscript --binary=%S/Inputs/inline-noprobe.perfbin --output=%t --skip-symbolization +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-noprobe.perfscript --binary=%S/Inputs/inline-noprobe.perfbin --output=%t +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK + +CHECK: main:836:0 +CHECK: 1: foo:836 +CHECK: 2.1: 42 +CHECK: 3: 62 +CHECK: 3.2: 21 +CHECK: 3.1: bar:252 +CHECK: 1: 42 +CHECK: 3.2: bar:63 +CHECK: 1: 21 + +CHECK-RAW-PROFILE: 3 +CHECK-RAW-PROFILE-NEXT: 650-691:21 +CHECK-RAW-PROFILE-NEXT: 669-677:20 +CHECK-RAW-PROFILE-NEXT: 669-691:21 +CHECK-RAW-PROFILE-NEXT: 2 +CHECK-RAW-PROFILE-NEXT: 677->650:21 +CHECK-RAW-PROFILE-NEXT: 691->669:43 + +; original code: +; clang -O3 -g -fdebug-info-for-profiling test.c -o a.out +#include + +int bar(int x, int y) { + if (x % 2) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 3) s = bar(i, s); else s += bar(s, i); + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test new file mode 100644 index 0000000..9b4e643 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -0,0 +1,109 @@ + +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-noprobe2.perfscript --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK + +;CHECK: partition_pivot_first:1050:5 +;CHECK-NEXT: 0: 5 +;CHECK-NEXT: 1: 5 +;CHECK-NEXT: 2: 5 +;CHECK-NEXT: 3: 5 +;CHECK-NEXT: 3.1: 83 +;CHECK-NEXT: 4: 82 +;CHECK-NEXT: 4.1: 26 +;CHECK-NEXT: 4.2: 25 +;CHECK-NEXT: 4.3: 26 +;CHECK-NEXT: 5: 6 +;CHECK-NEXT: 6: 6 +;CHECK-NEXT: 4.2: swap:100 +;CHECK-NEXT: 1: 25 +;CHECK-NEXT: 2: 25 +;CHECK-NEXT: 3: 25 +;CHECK-NEXT: 5: swap:24 +;CHECK-NEXT: 1: 6 +;CHECK-NEXT: 2: 6 +;CHECK-NEXT: 3: 6 +;CHECK-NEXT: quick_sort:414:25 +;CHECK-NEXT: 1: 24 +;CHECK-NEXT: 2: 12 partition_pivot_last:7 partition_pivot_first:5 +;CHECK-NEXT: 3: 11 quick_sort:12 +;CHECK-NEXT: 4: 12 quick_sort:12 +;CHECK-NEXT: 6: 24 +;CHECK-NEXT: partition_pivot_last:391:7 +;CHECK-NEXT: 1: 6 +;CHECK-NEXT: 2: 6 +;CHECK-NEXT: 3: 6 +;CHECK-NEXT: 3.1: 18 +;CHECK-NEXT: 3.3: 18 +;CHECK-NEXT: 4: 19 +;CHECK-NEXT: 5: 9 +;CHECK-NEXT: 6: 5 +;CHECK-NEXT: 7: 5 +;CHECK-NEXT: 5: swap:61 +;CHECK-NEXT: 1: 9 +;CHECK-NEXT: 2: 9 +;CHECK-NEXT: 3: 9 +;CHECK-NEXT: 6: swap:20 +;CHECK-NEXT: 1: 5 +;CHECK-NEXT: 2: 5 +;CHECK-NEXT: 3: 5 +;CHECK-NEXT: main:213:0 +;CHECK-NEXT: 5.1: 10 +;CHECK-NEXT: 5.3: 10 +;CHECK-NEXT: 6: 10 +;CHECK-NEXT: 6.1: 12 +;CHECK-NEXT: 6.3: 10 +;CHECK-NEXT: 8: 0 quick_sort:1 + +; original code: +; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out +#include +#include + +void swap(int *a, int *b) { + int t = *a; + *a = *b; + *b = t; +} + +int partition_pivot_last(int* array, int low, int high) { + int pivot = array[high]; + int i = low - 1; + for (int j = low; j < high; j++) + if (array[j] < pivot) + swap(&array[++i], &array[j]); + swap(&array[i + 1], &array[high]); + return (i + 1); +} + +int partition_pivot_first(int* array, int low, int high) { + int pivot = array[low]; + int i = low + 1; + for (int j = low + 1; j <= high; j++) + if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;} + swap(&array[i - 1], &array[low]); + return i - 1; +} + +void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) { + if (low < high) { + int pi = (*partition_func)(array, low, high); + quick_sort(array, low, pi - 1, partition_func); + quick_sort(array, pi + 1, high, partition_func); + } +} + +int main() { + const int size = 200; + int sum = 0; + int *array = malloc(size * sizeof(int)); + for(int i = 0; i < 100 * 1000; i++) { + for(int j = 0; j < size; j++) + array[j] = j % 10 ? rand() % size: j; + int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first; + quick_sort(array, 0, size - 1, fptr); + sum += array[i % size]; + } + printf("sum=%d\n", sum); + + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/noprobe.test b/llvm/test/tools/llvm-profgen/noinline-noprobe.test similarity index 83% rename from llvm/test/tools/llvm-profgen/noprobe.test rename to llvm/test/tools/llvm-profgen/noinline-noprobe.test index fb705ba..b4cc547 100644 --- a/llvm/test/tools/llvm-profgen/noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-noprobe.test @@ -4,6 +4,18 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.aggperfscript --binary=%S/Inputs/noprobe.perfbin --output=%t --skip-symbolization ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE-AGG +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK + +CHECK: foo:360:0 +CHECK: 2: 19 +CHECK: 3: 19 bar:21 +CHECK: bar:324:21 +CHECK: 0: 20 +CHECK: 1: 20 +CHECK: 2: 13 +CHECK: 4: 6 +CHECK: 5: 18 CHECK-RAW-PROFILE: 7 CHECK-RAW-PROFILE-NEXT: 5b0-5c8:7 @@ -40,7 +52,7 @@ CHECK-RAW-PROFILE-AGG-NEXT: 645->5ff:31 ; original code: -; clang -O3 -g -debug-info-for-profiling test.c -o a.out +; clang -O3 -g -fdebug-info-for-profiling test.c -fno-inline -o a.out #include int bar(int x, int y) { diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 3d3abda..a71282a 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -74,33 +74,30 @@ int32_t CSProfileGenerator::MaxCompressionSize = -1; int CSProfileGenerator::MaxContextDepth = -1; -std::unique_ptr -ProfileGenerator::create(ProfiledBinary *Binary, - const ContextSampleCounterMap &SampleCounters, - enum PerfScriptType SampleType) { - std::unique_ptr ProfileGenerator; - if (SampleType == PERF_LBR_STACK) { - if (Binary->usePseudoProbes()) { - ProfileGenerator.reset( - new PseudoProbeCSProfileGenerator(Binary, SampleCounters)); - } else { - ProfileGenerator.reset(new CSProfileGenerator(Binary, SampleCounters)); - } +std::unique_ptr +ProfileGeneratorBase::create(ProfiledBinary *Binary, + const ContextSampleCounterMap &SampleCounters, + enum PerfScriptType SampleType) { + std::unique_ptr Generator; + if (SampleType == PERF_LBR) { + // TODO: Support probe based profile generation + Generator.reset(new ProfileGenerator(Binary, SampleCounters)); + } else if (SampleType == PERF_LBR_STACK) { + Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); } else { - // TODO: llvm_unreachable("Unsupported perfscript!"); } - return ProfileGenerator; + return Generator; } -void ProfileGenerator::write(std::unique_ptr Writer, - SampleProfileMap &ProfileMap) { +void ProfileGeneratorBase::write(std::unique_ptr Writer, + SampleProfileMap &ProfileMap) { if (std::error_code EC = Writer->write(ProfileMap)) exitWithError(std::move(EC)); } -void ProfileGenerator::write() { +void ProfileGeneratorBase::write() { auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); if (std::error_code EC = WriterOrErr.getError()) exitWithError(EC, OutputFilename); @@ -116,8 +113,8 @@ void ProfileGenerator::write() { write(std::move(WriterOrErr.get()), ProfileMap); } -void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges, - const RangeSample &Ranges) { +void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, + const RangeSample &Ranges) { /* Regions may overlap with each other. Using the boundary info, find all @@ -216,6 +213,137 @@ void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges, } } +void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( + FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, + uint64_t Count) { + // Filter out invalid negative(int type) lineOffset + if (LeafLoc.Callsite.LineOffset & 0x80000000) + return; + // Use the maximum count of samples with same line location + ErrorOr R = FunctionProfile.findSamplesAt( + LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator); + uint64_t PreviousCount = R ? R.get() : 0; + if (PreviousCount < Count) { + FunctionProfile.addBodySamples(LeafLoc.Callsite.LineOffset, + LeafLoc.Callsite.Discriminator, + Count - PreviousCount); + } +} + +FunctionSamples & +ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { + SampleContext Context(FuncName); + auto Ret = ProfileMap.emplace(Context, FunctionSamples()); + if (Ret.second) { + FunctionSamples &FProfile = Ret.first->second; + FProfile.setContext(Context); + } + return Ret.first->second; +} + +void ProfileGenerator::generateProfile() { + if (Binary->usePseudoProbes()) { + // TODO: Support probe based profile generation + } else { + generateLineNumBasedProfile(); + } +} + +void ProfileGenerator::generateLineNumBasedProfile() { + assert(SampleCounters.size() == 1 && + "Must have one entry for profile generation."); + const SampleCounter &SC = SampleCounters.begin()->second; + // Fill in function body samples + populateBodySamplesForAllFunctions(SC.RangeCounter); + // Fill in boundary sample counts as well as call site samples for calls + populateBoundarySamplesForAllFunctions(SC.BranchCounter); +} + +FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( + const SampleContextFrameVector &FrameVec, uint64_t Count) { + // Get top level profile + FunctionSamples *FunctionProfile = + &getTopLevelFunctionProfile(FrameVec[0].CallerName); + FunctionProfile->addTotalSamples(Count); + + for (size_t I = 1; I < FrameVec.size(); I++) { + FunctionSamplesMap &SamplesMap = + FunctionProfile->functionSamplesAt(FrameVec[I - 1].Callsite); + auto Ret = + SamplesMap.emplace(FrameVec[I].CallerName.str(), FunctionSamples()); + if (Ret.second) { + SampleContext Context(FrameVec[I].CallerName); + Ret.first->second.setContext(Context); + } + FunctionProfile = &Ret.first->second; + FunctionProfile->addTotalSamples(Count); + } + + return *FunctionProfile; +} + +void ProfileGenerator::populateBodySamplesForAllFunctions( + const RangeSample &RangeCounter) { + RangeSample Ranges; + findDisjointRanges(Ranges, RangeCounter); + for (auto Range : Ranges) { + uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); + uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); + uint64_t Count = Range.second; + // Disjoint ranges have introduce zero-filled gap that + // doesn't belong to current context, filter them out. + if (Count == 0) + continue; + + InstructionPointer IP(Binary, RangeBegin, true); + // Disjoint ranges may have range in the middle of two instr, + // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range + // can be Addr1+1 to Addr2-1. We should ignore such range. + while (IP.Address <= RangeEnd) { + uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); + const SampleContextFrameVector &FrameVec = + Binary->getFrameLocationStack(Offset); + if (!FrameVec.empty()) { + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, Count); + updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), + Count); + } + // Move to next IP within the range. + IP.advance(); + } + } +} + +void ProfileGenerator::populateBoundarySamplesForAllFunctions( + const BranchSample &BranchCounters) { + for (auto Entry : BranchCounters) { + uint64_t SourceOffset = Entry.first.first; + uint64_t TargetOffset = Entry.first.second; + uint64_t Count = Entry.second; + assert(Count != 0 && "Unexpected zero weight branch"); + + // Get the callee name by branch target if it's a call branch. + StringRef CalleeName = FunctionSamples::getCanonicalFnName( + Binary->getFuncFromStartOffset(TargetOffset)); + if (CalleeName.size() == 0) + continue; + // Record called target sample and its count. + const SampleContextFrameVector &FrameVec = + Binary->getFrameLocationStack(SourceOffset); + if (!FrameVec.empty()) { + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, Count); + FunctionProfile.addCalledTargetSamples( + FrameVec.back().Callsite.LineOffset, + FrameVec.back().Callsite.Discriminator, CalleeName, Count); + } + // Add head samples for callee. + FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); + CalleeProfile.addHeadSamples(Count); + } +} + FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( const SampleContextFrameVector &Context, bool WasLeafInlined) { auto I = ProfileMap.find(SampleContext(Context)); @@ -235,6 +363,17 @@ FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( void CSProfileGenerator::generateProfile() { FunctionSamples::ProfileIsCS = true; + if (Binary->usePseudoProbes()) { + // Enable pseudo probe functionalities in SampleProf + FunctionSamples::ProfileIsProbeBased = true; + generateProbeBasedProfile(); + } else { + generateLineNumBasedProfile(); + } + postProcessProfiles(); +} + +void CSProfileGenerator::generateLineNumBasedProfile() { for (const auto &CI : SampleCounters) { const StringBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); @@ -243,38 +382,19 @@ void CSProfileGenerator::generateProfile() { getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); // Fill in function body samples - populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter); + populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter); // Fill in boundary sample counts as well as call site samples for calls - populateFunctionBoundarySamples(CtxKey->Context, FunctionProfile, - CI.second.BranchCounter); + populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, + CI.second.BranchCounter); } // Fill in call site value sample for inlined calls and also use context to // infer missing samples. Since we don't have call count for inlined // functions, we estimate it from inlinee's profile using the entry of the // body sample. populateInferredFunctionSamples(); - - postProcessProfiles(); -} - -void CSProfileGenerator::updateBodySamplesforFunctionProfile( - FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, - uint64_t Count) { - // Filter out invalid negative(int type) lineOffset - if (LeafLoc.Callsite.LineOffset & 0x80000000) - return; - // Use the maximum count of samples with same line location - ErrorOr R = FunctionProfile.findSamplesAt( - LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator); - uint64_t PreviousCount = R ? R.get() : 0; - if (PreviousCount < Count) { - FunctionProfile.addBodySamples(LeafLoc.Callsite.LineOffset, - LeafLoc.Callsite.Discriminator, - Count - PreviousCount); - } } -void CSProfileGenerator::populateFunctionBodySamples( +void CSProfileGenerator::populateBodySamplesForFunction( FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { // Compute disjoint ranges first, so we can use MAX // for calculating count for each location. @@ -290,13 +410,9 @@ void CSProfileGenerator::populateFunctionBodySamples( continue; InstructionPointer IP(Binary, RangeBegin, true); - // Disjoint ranges may have range in the middle of two instr, // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range // can be Addr1+1 to Addr2-1. We should ignore such range. - if (IP.Address > RangeEnd) - continue; - while (IP.Address <= RangeEnd) { uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); @@ -312,7 +428,7 @@ void CSProfileGenerator::populateFunctionBodySamples( } } -void CSProfileGenerator::populateFunctionBoundarySamples( +void CSProfileGenerator::populateBoundarySamplesForFunction( SampleContextFrames ContextId, FunctionSamples &FunctionProfile, const BranchSample &BranchCounters) { @@ -320,6 +436,8 @@ void CSProfileGenerator::populateFunctionBoundarySamples( uint64_t SourceOffset = Entry.first.first; uint64_t TargetOffset = Entry.first.second; uint64_t Count = Entry.second; + assert(Count != 0 && "Unexpected zero weight branch"); + // Get the callee name by branch target if it's a call branch StringRef CalleeName = FunctionSamples::getCanonicalFnName( Binary->getFuncFromStartOffset(TargetOffset)); @@ -341,7 +459,6 @@ void CSProfileGenerator::populateFunctionBoundarySamples( CalleeCtx.back() = *LeafLoc; CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); - assert(Count != 0 && "Unexpected zero weight branch"); CalleeProfile.addHeadSamples(Count); } } @@ -434,12 +551,6 @@ void CSProfileGenerator::computeSummaryAndThreshold() { (Summary->getDetailedSummary())); } -void CSProfileGenerator::write(std::unique_ptr Writer, - SampleProfileMap &ProfileMap) { - if (std::error_code EC = Writer->write(ProfileMap)) - exitWithError(std::move(EC)); -} - // Helper function to extract context prefix string stack // Extract context stack for reusing, leaf context stack will // be added compressed while looking up function profile @@ -452,10 +563,7 @@ static void extractPrefixContextStack( } } -void PseudoProbeCSProfileGenerator::generateProfile() { - // Enable pseudo probe functionalities in SampleProf - FunctionSamples::ProfileIsProbeBased = true; - FunctionSamples::ProfileIsCS = true; +void CSProfileGenerator::generateProbeBasedProfile() { for (const auto &CI : SampleCounters) { const ProbeBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); @@ -467,12 +575,10 @@ void PseudoProbeCSProfileGenerator::generateProfile() { // Fill in boundary samples for a call probe populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); } - - postProcessProfiles(); } -void PseudoProbeCSProfileGenerator::extractProbesFromRange( - const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter) { +void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, + ProbeCounterMap &ProbeCounter) { RangeSample Ranges; findDisjointRanges(Ranges, RangeCounter); for (const auto &Range : Ranges) { @@ -509,7 +615,7 @@ void PseudoProbeCSProfileGenerator::extractProbesFromRange( } } -void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( +void CSProfileGenerator::populateBodySamplesWithProbes( const RangeSample &RangeCounter, SampleContextFrames ContextStack) { ProbeCounterMap ProbeCounter; // Extract the top frame probes by looking up each address among the range in @@ -568,7 +674,7 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( } } -void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( +void CSProfileGenerator::populateBoundarySamplesWithProbes( const BranchSample &BranchCounter, SampleContextFrames ContextStack) { for (auto BI : BranchCounter) { uint64_t SourceOffset = BI.first.first; @@ -592,7 +698,7 @@ void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( } } -FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( +FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { // Explicitly copy the context for appending the leaf context diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h index 96b3675..681eac4 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -22,21 +22,26 @@ using namespace sampleprof; namespace llvm { namespace sampleprof { -class ProfileGenerator { +// This base class for profile generation of sample-based PGO. We reuse all +// structures relating to function profiles and profile writers as seen in +// /ProfileData/SampleProf.h. +class ProfileGeneratorBase { public: - ProfileGenerator(ProfiledBinary *B) : Binary(B){}; - virtual ~ProfileGenerator() = default; - static std::unique_ptr + ProfileGeneratorBase(ProfiledBinary *Binary, + const ContextSampleCounterMap &Counters) + : Binary(Binary), SampleCounters(Counters){}; + virtual ~ProfileGeneratorBase() = default; + static std::unique_ptr create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters, enum PerfScriptType SampleType); virtual void generateProfile() = 0; - // Use SampleProfileWriter to serialize profile map - virtual void write(std::unique_ptr Writer, - SampleProfileMap &ProfileMap); void write(); protected: + // Use SampleProfileWriter to serialize profile map + void write(std::unique_ptr Writer, + SampleProfileMap &ProfileMap); /* For each region boundary point, mark if it is begin or end (or both) of the region. Boundary points are inclusive. Log the sample count as well @@ -54,23 +59,51 @@ protected: */ void findDisjointRanges(RangeSample &DisjointRanges, const RangeSample &Ranges); + // Helper function for updating body sample for a leaf location in + // FunctionProfile + void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, + const SampleContextFrame &LeafLoc, + uint64_t Count); // Used by SampleProfileWriter SampleProfileMap ProfileMap; ProfiledBinary *Binary = nullptr; -}; -class CSProfileGenerator : public ProfileGenerator { -protected: const ContextSampleCounterMap &SampleCounters; +}; + +class ProfileGenerator : public ProfileGeneratorBase { + +public: + ProfileGenerator(ProfiledBinary *Binary, + const ContextSampleCounterMap &Counters) + : ProfileGeneratorBase(Binary, Counters){}; + void generateProfile() override; + +private: + void generateLineNumBasedProfile(); + FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName); + // Helper function to get the leaf frame's FunctionProfile by traversing the + // inline stack and meanwhile it adds the total samples for each frame's + // function profile. + FunctionSamples & + getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec, + uint64_t Count); + void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); + void + populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); +}; + +using ProbeCounterMap = + std::unordered_map; +class CSProfileGenerator : public ProfileGeneratorBase { public: CSProfileGenerator(ProfiledBinary *Binary, const ContextSampleCounterMap &Counters) - : ProfileGenerator(Binary), SampleCounters(Counters){}; + : ProfileGeneratorBase(Binary, Counters){}; -public: void generateProfile() override; // Trim the context stack at a given depth. @@ -188,7 +221,8 @@ public: } } -protected: +private: + void generateLineNumBasedProfile(); // Lookup or create FunctionSamples for the context FunctionSamples & getFunctionProfileForContext(const SampleContextFrameVector &Context, @@ -196,49 +230,17 @@ protected: // Post processing for profiles before writing out, such as mermining // and trimming cold profiles, running preinliner on profiles. void postProcessProfiles(); - void computeSummaryAndThreshold(); - void write(std::unique_ptr Writer, - SampleProfileMap &ProfileMap) override; - - // Thresholds from profile summary to answer isHotCount/isColdCount queries. - uint64_t HotCountThreshold; - uint64_t ColdCountThreshold; - // Underlying context table serves for sample profile writer. - std::unordered_set Contexts; + void computeSummaryAndThreshold(); -private: - // Helper function for updating body sample for a leaf location in - // FunctionProfile - void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, - const SampleContextFrame &LeafLoc, - uint64_t Count); - void populateFunctionBodySamples(FunctionSamples &FunctionProfile, - const RangeSample &RangeCounters); - void populateFunctionBoundarySamples(SampleContextFrames ContextId, - FunctionSamples &FunctionProfile, - const BranchSample &BranchCounters); + void populateBodySamplesForFunction(FunctionSamples &FunctionProfile, + const RangeSample &RangeCounters); + void populateBoundarySamplesForFunction(SampleContextFrames ContextId, + FunctionSamples &FunctionProfile, + const BranchSample &BranchCounters); void populateInferredFunctionSamples(); -public: - // Deduplicate adjacent repeated context sequences up to a given sequence - // length. -1 means no size limit. - static int32_t MaxCompressionSize; - static int MaxContextDepth; -}; - -using ProbeCounterMap = - std::unordered_map; - -class PseudoProbeCSProfileGenerator : public CSProfileGenerator { - -public: - PseudoProbeCSProfileGenerator(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) - : CSProfileGenerator(Binary, Counters) {} - void generateProfile() override; - -private: + void generateProbeBasedProfile(); // Go through each address from range to extract the top frame probe by // looking up in the Address2ProbeMap void extractProbesFromRange(const RangeSample &RangeCounter, @@ -253,6 +255,18 @@ private: FunctionSamples & getFunctionProfileForLeafProbe(SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe); + // Thresholds from profile summary to answer isHotCount/isColdCount queries. + uint64_t HotCountThreshold; + uint64_t ColdCountThreshold; + + // Underlying context table serves for sample profile writer. + std::unordered_set Contexts; + +public: + // Deduplicate adjacent repeated context sequences up to a given sequence + // length. -1 means no size limit. + static int32_t MaxCompressionSize; + static int MaxContextDepth; }; } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 9e4d11b..85f0791 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -232,12 +232,6 @@ class ProfiledBinary { /// 3. Pseudo probe related sections, used by probe-based profile /// generation. void load(); - const SampleContextFrameVector &getFrameLocationStack(uint64_t Offset) const { - auto I = Offset2LocStackMap.find(Offset); - assert(I != Offset2LocStackMap.end() && - "Can't find location for offset in the binary"); - return I->second; - } public: ProfiledBinary(const StringRef Path) @@ -310,13 +304,23 @@ public: } StringRef getFuncFromStartOffset(uint64_t Offset) { - return FuncStartAddrMap[Offset]; + auto I = FuncStartAddrMap.find(Offset); + if (I == FuncStartAddrMap.end()) + return StringRef(); + return I->second; } uint32_t getFuncSizeForContext(SampleContext &Context) { return FuncSizeTracker.getFuncSizeForContext(Context); } + const SampleContextFrameVector &getFrameLocationStack(uint64_t Offset) const { + auto I = Offset2LocStackMap.find(Offset); + assert(I != Offset2LocStackMap.end() && + "Can't find location for offset in the binary"); + return I->second; + } + Optional getInlineLeafFrameLoc(uint64_t Offset) { const auto &Stack = getFrameLocationStack(Offset); if (Stack.empty()) diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp index f4e063f..b7cad8f 100644 --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -93,14 +93,9 @@ int main(int argc, const char *argv[]) { if (SkipSymbolization) return EXIT_SUCCESS; - // TBD - if (Reader->getPerfScriptType() == PERF_LBR) { - WithColor::warning() << "Currently LBR only perf script is not supported!"; - return EXIT_SUCCESS; - } - - std::unique_ptr Generator = ProfileGenerator::create( - Binary.get(), Reader->getSampleCounters(), Reader->getPerfScriptType()); + std::unique_ptr Generator = + ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(), + Reader->getPerfScriptType()); Generator->generateProfile(); Generator->write();