From 75c069584a3f97122c0defb292272048af5a0c2f Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 3 Jan 2023 17:38:52 -0800 Subject: [PATCH] [BOLT][Docs] Add Sphinx documentation Add stub Sphinx documentation, with configuration copy-pasted from lld and index page converted from bolt/README.md. Reviewed By: #bolt, rafauler Differential Revision: https://reviews.llvm.org/D140156 --- bolt/docs/CMakeLists.txt | 3 - bolt/docs/README.txt | 13 +++ bolt/docs/_static/favicon.ico | Bin 0 -> 1150 bytes bolt/docs/conf.py | 246 ++++++++++++++++++++++++++++++++++++++++ bolt/docs/index.rst | 257 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 516 insertions(+), 3 deletions(-) create mode 100644 bolt/docs/README.txt create mode 100644 bolt/docs/_static/favicon.ico create mode 100644 bolt/docs/conf.py create mode 100644 bolt/docs/index.rst diff --git a/bolt/docs/CMakeLists.txt b/bolt/docs/CMakeLists.txt index babbb0f..b230512 100644 --- a/bolt/docs/CMakeLists.txt +++ b/bolt/docs/CMakeLists.txt @@ -97,9 +97,6 @@ if (LLVM_ENABLE_SPHINX) if (${SPHINX_OUTPUT_HTML}) add_sphinx_target(html bolt) endif() - if (${SPHINX_OUTPUT_MAN}) - add_sphinx_target(man bolt) - endif() endif() endif() diff --git a/bolt/docs/README.txt b/bolt/docs/README.txt new file mode 100644 index 0000000..b807dd5 --- /dev/null +++ b/bolt/docs/README.txt @@ -0,0 +1,13 @@ +BOLT Documentation +==================== + +The BOLT documentation is written using the Sphinx documentation generator. It +is currently tested with Sphinx 1.1.3. + +To build the documents into html configure BOLT with the following cmake options: + + * -DLLVM_ENABLE_SPHINX=ON + * -DBOLT_INCLUDE_DOCS=ON + +After configuring BOLT with these options the make rule `docs-bolt-html` should +be available. diff --git a/bolt/docs/_static/favicon.ico b/bolt/docs/_static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..724ad6e12dd406cd595568426b88aedfd46365f9 GIT binary patch literal 1150 zcmZQzU<5(|0R|wcz>vYhz#zuJz@P!dKp~(AL>x#lFaYJy!TNEAIg9K{NV?ggU<}0-qQQ`@9I5#a5w7W zg)`m94(*%s?CBHjj~_pXzI*f9osj;&fB!Il`t(u%&!0c+pFe++KX!Qk^7*qS|J}Z6 z&8rvBo@C?G|L*NuS&-X){P@8-d-~MMGpA1aZ(g^0a%OVOx8;lG{NK54!|`w5zHxr} z@|g!{K8ioSfBVKZxvyu=^QTWF!~8r>>Z>XI4fplkMELCAKvr-^ZPf? z*^@`N-Mo6aC??eZueYQ1&0jx%GQ<4S(O5gdR8#qXfUCoQM`PXpQT`r3zJL2FcjfZA z!kZUQy#TtG|LNlgHE-X%vgm7XK5M3_^mopTDfMq&zZL@dB_%rafR(P=|Cj);|6!g^ z|L0DfxCvzTl}i_9e0cvZ1;oC8>-s{VnWEb^tZfI`;c9LA?aayJHXyh8yV~Ef)>HrQ zZe{#GKPB$l`O_!bK>mC4`jzLcYgc9iao>xlkJCZs?BBJ$GQ8MA%AW3HU4kgxIPM| z4rKQC@83j!{rbfT6T?P-2Bwc`y&Wfl+#G*hzIfgRrUs-RgkgMKG)N6FP0M}z_EnM? z^|;KTo;U;0%@F(tNdL13VtEFJ3+xOG2QM=)G#4{4G+hMJmJAI2AA#zABGiNQ0RZmu B;|2f# literal 0 HcmV?d00001 diff --git a/bolt/docs/conf.py b/bolt/docs/conf.py new file mode 100644 index 0000000..6f1f9ee --- /dev/null +++ b/bolt/docs/conf.py @@ -0,0 +1,246 @@ +# -*- coding: utf-8 -*- +# +# BOLT documentation build configuration file. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os +from datetime import date + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.todo'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'BOLT' +copyright = u'2015-%d, BOLT team' % date.today().year + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +today_fmt = '%Y-%m-%d' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +show_authors = True + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'friendly' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'haiku' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = ["."] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# If given, this must be the name of an image file (path relative to the +# configuration directory) that is the favicon of the docs. Modern browsers use +# this as icon for tabs, windows and bookmarks. It should be a Windows-style +# icon file (.ico), which is 16x16 or 32x32 pixels large. Default: None. The +# image file will be copied to the _static directory of the output HTML, but +# only if the file does not already exist there. +html_favicon = '_static/favicon.ico' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +html_last_updated_fmt = '%Y-%m-%d' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +html_sidebars = {'index': ['indexsidebar.html']} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {'index': 'index.html'} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'boltdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('contents', 'bolt.tex', u'BOLT Documentation', + u'LLVM project', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('contents', 'bolt', u'BOLT Documentation', + [u'LLVM project'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('contents', 'BOLT', u'BOLT Documentation', + u'LLVM project', 'BOLT', 'Binary Optimization and Layout Tool', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + + +# FIXME: Define intersphinx configuration. +intersphinx_mapping = {} + + +# -- Options for extensions ---------------------------------------------------- + +# Enable this if you want TODOs to show up in the generated documentation. +todo_include_todos = True diff --git a/bolt/docs/index.rst b/bolt/docs/index.rst new file mode 100644 index 0000000..fce6d25 --- /dev/null +++ b/bolt/docs/index.rst @@ -0,0 +1,257 @@ +BOLT +==== + +BOLT is a post-link optimizer developed to speed up large applications. +It achieves the improvements by optimizing application’s code layout +based on execution profile gathered by sampling profiler, such as Linux +``perf`` tool. An overview of the ideas implemented in BOLT along with a +discussion of its potential and current results is available in `CGO’19 +paper `__. + +Input Binary Requirements +------------------------- + +BOLT operates on X86-64 and AArch64 ELF binaries. At the minimum, the +binaries should have an unstripped symbol table, and, to get maximum +performance gains, they should be linked with relocations +(``--emit-relocs`` or ``-q`` linker flag). + +BOLT disassembles functions and reconstructs the control flow graph +(CFG) before it runs optimizations. Since this is a nontrivial task, +especially when indirect branches are present, we rely on certain +heuristics to accomplish it. These heuristics have been tested on a code +generated with Clang and GCC compilers. The main requirement for C/C++ +code is not to rely on code layout properties, such as function pointer +deltas. Assembly code can be processed too. Requirements for it include +a clear separation of code and data, with data objects being placed into +data sections/segments. If indirect jumps are used for intra-function +control transfer (e.g., jump tables), the code patterns should be +matching those generated by Clang/GCC. + +NOTE: BOLT is currently incompatible with the +``-freorder-blocks-and-partition`` compiler option. Since GCC8 enables +this option by default, you have to explicitly disable it by adding +``-fno-reorder-blocks-and-partition`` flag if you are compiling with +GCC8 or above. + +NOTE2: DWARF v5 is the new debugging format generated by the latest LLVM +and GCC compilers. It offers several benefits over the previous DWARF +v4. Currently, the support for v5 is a work in progress for BOLT. While +you will be able to optimize binaries produced by the latest compilers, +until the support is complete, you will not be able to update the debug +info with ``-update-debug-sections``. To temporarily work around the +issue, we recommend compiling binaries with ``-gdwarf-4`` option that +forces DWARF v4 output. + +PIE and .so support has been added recently. Please report bugs if you +encounter any issues. + +Installation +------------ + +Docker Image +~~~~~~~~~~~~ + +You can build and use the docker image containing BOLT using our `docker +file `__. Alternatively, you can build BOLT +manually using the steps below. + +Manual Build +~~~~~~~~~~~~ + +BOLT heavily uses LLVM libraries, and by design, it is built as one of +LLVM tools. The build process is not much different from a regular LLVM +build. The following instructions are assuming that you are running +under Linux. + +Start with cloning LLVM repo: + +:: + + > git clone https://github.com/llvm/llvm-project.git + > mkdir build + > cd build + > cmake -G Ninja ../llvm-project/llvm -DLLVM_TARGETS_TO_BUILD="X86;AArch64" -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_ENABLE_PROJECTS="bolt" + > ninja bolt + +``llvm-bolt`` will be available under ``bin/``. Add this directory to +your path to ensure the rest of the commands in this tutorial work. + +Optimizing BOLT’s Performance +----------------------------- + +BOLT runs many internal passes in parallel. If you foresee heavy usage +of BOLT, you can improve the processing time by linking against one of +memory allocation libraries with good support for concurrency. E.g. to +use jemalloc: + +:: + + > sudo yum install jemalloc-devel + > LD_PRELOAD=/usr/lib64/libjemalloc.so llvm-bolt .... + +Or if you rather use tcmalloc: + +:: + + > sudo yum install gperftools-devel + > LD_PRELOAD=/usr/lib64/libtcmalloc_minimal.so llvm-bolt .... + +Usage +----- + +For a complete practical guide of using BOLT see `Optimizing Clang with +BOLT `__. + +Step 0 +~~~~~~ + +In order to allow BOLT to re-arrange functions (in addition to +re-arranging code within functions) in your program, it needs a little +help from the linker. Add ``--emit-relocs`` to the final link step of +your application. You can verify the presence of relocations by checking +for ``.rela.text`` section in the binary. BOLT will also report if it +detects relocations while processing the binary. + +Step 1: Collect Profile +~~~~~~~~~~~~~~~~~~~~~~~ + +This step is different for different kinds of executables. If you can +invoke your program to run on a representative input from a command +line, then check **For Applications** section below. If your program +typically runs as a server/service, then skip to **For Services** +section. + +The version of ``perf`` command used for the following steps has to +support ``-F brstack`` option. We recommend using ``perf`` version 4.5 +or later. + +For Applications +^^^^^^^^^^^^^^^^ + +This assumes you can run your program from a command line with a typical +input. In this case, simply prepend the command line invocation with +``perf``: + +:: + + $ perf record -e cycles:u -j any,u -o perf.data -- ... + +For Services +^^^^^^^^^^^^ + +Once you get the service deployed and warmed-up, it is time to collect +perf data with LBR (branch information). The exact perf command to use +will depend on the service. E.g., to collect the data for all processes +running on the server for the next 3 minutes use: + +:: + + $ perf record -e cycles:u -j any,u -a -o perf.data -- sleep 180 + +Depending on the application, you may need more samples to be included +with your profile. It’s hard to tell upfront what would be a sweet spot +for your application. We recommend the profile to cover 1B instructions +as reported by BOLT ``-dyno-stats`` option. If you need to increase the +number of samples in the profile, you can either run the ``sleep`` +command for longer and use ``-F`` option with ``perf`` to increase +sampling frequency. + +Note that for profile collection we recommend using cycle events and not +``BR_INST_RETIRED.*``. Empirically we found it to produce better +results. + +If the collection of a profile with branches is not available, e.g., +when you run on a VM or on hardware that does not support it, then you +can use only sample events, such as cycles. In this case, the quality of +the profile information would not be as good, and performance gains with +BOLT are expected to be lower. + +With instrumentation +^^^^^^^^^^^^^^^^^^^^ + +If perf record is not available to you, you may collect profile by first +instrumenting the binary with BOLT and then running it. + +:: + + llvm-bolt -instrument -o + +After you run instrumented-executable with the desired workload, its +BOLT profile should be ready for you in ``/tmp/prof.fdata`` and you can +skip **Step 2**. + +Run BOLT with the ``-help`` option and check the category “BOLT +instrumentation options” for a quick reference on instrumentation knobs. + +Step 2: Convert Profile to BOLT Format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +NOTE: you can skip this step and feed ``perf.data`` directly to BOLT +using experimental ``-p perf.data`` option. + +For this step, you will need ``perf.data`` file collected from the +previous step and a copy of the binary that was running. The binary has +to be either unstripped, or should have a symbol table intact (i.e., +running ``strip -g`` is okay). + +Make sure ``perf`` is in your ``PATH``, and execute ``perf2bolt``: + +:: + + $ perf2bolt -p perf.data -o perf.fdata + +This command will aggregate branch data from ``perf.data`` and store it +in a format that is both more compact and more resilient to binary +modifications. + +If the profile was collected without LBRs, you will need to add ``-nl`` +flag to the command line above. + +Step 3: Optimize with BOLT +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once you have ``perf.fdata`` ready, you can use it for optimizations +with BOLT. Assuming your environment is setup to include the right path, +execute ``llvm-bolt``: + +:: + + $ llvm-bolt -o .bolt -data=perf.fdata -reorder-blocks=ext-tsp -reorder-functions=hfsort -split-functions -split-all-cold -split-eh -dyno-stats + +If you do need an updated debug info, then add +``-update-debug-sections`` option to the command above. The processing +time will be slightly longer. + +For a full list of options see ``-help``/``-help-hidden`` output. + +The input binary for this step does not have to 100% match the binary +used for profile collection in **Step 1**. This could happen when you +are doing active development, and the source code constantly changes, +yet you want to benefit from profile-guided optimizations. However, +since the binary is not precisely the same, the profile information +could become invalid or stale, and BOLT will report the number of +functions with a stale profile. The higher the number, the less +performance improvement should be expected. Thus, it is crucial to +update ``.fdata`` for release branches. + +Multiple Profiles +----------------- + +Suppose your application can run in different modes, and you can +generate multiple profiles for each one of them. To generate a single +binary that can benefit all modes (assuming the profiles don’t +contradict each other) you can use ``merge-fdata`` tool: + +:: + + $ merge-fdata *.fdata > combined.fdata + +Use ``combined.fdata`` for **Step 3** above to generate a universally +optimized binary. + +License +------- + +BOLT is licensed under the `Apache License v2.0 with LLVM +Exceptions <./LICENSE.TXT>`__. -- 2.7.4