Merge tag 'clang-lto-v5.12-rc1-part2' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 23 Feb 2021 23:13:45 +0000 (15:13 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 23 Feb 2021 23:13:45 +0000 (15:13 -0800)
Pull more clang LTO updates from Kees Cook:
 "Clang LTO x86 enablement.

  Full disclosure: while this has _not_ been in linux-next (since it
  initially looked like the objtool dependencies weren't going to make
  v5.12), it has been under daily build and runtime testing by Sami for
  quite some time. These x86 portions have been discussed on lkml, with
  Peter, Josh, and others helping nail things down.

  The bulk of the changes are to get objtool working happily. The rest
  of the x86 enablement is very small.

  Summary:

   - Generate __mcount_loc in objtool (Peter Zijlstra)

   - Support running objtool against vmlinux.o (Sami Tolvanen)

   - Clang LTO enablement for x86 (Sami Tolvanen)"

Link: https://lore.kernel.org/lkml/20201013003203.4168817-26-samitolvanen@google.com/
Link: https://lore.kernel.org/lkml/cover.1611263461.git.jpoimboe@redhat.com/
* tag 'clang-lto-v5.12-rc1-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  kbuild: lto: force rebuilds when switching CONFIG_LTO
  x86, build: allow LTO to be selected
  x86, cpu: disable LTO for cpu.c
  x86, vdso: disable LTO only for vDSO
  kbuild: lto: postpone objtool
  objtool: Split noinstr validation from --vmlinux
  x86, build: use objtool mcount
  tracing: add support for objtool mcount
  objtool: Don't autodetect vmlinux.o
  objtool: Fix __mcount_loc generation with Clang's assembler
  objtool: Add a pass for generating __mcount_loc

12 files changed:
1  2 
Makefile
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/power/Makefile
kernel/trace/Kconfig
scripts/Makefile.lib
tools/objtool/builtin-check.c
tools/objtool/check.c
tools/objtool/include/objtool/builtin.h
tools/objtool/include/objtool/check.h
tools/objtool/include/objtool/objtool.h
tools/objtool/objtool.c

diff --combined Makefile
+++ b/Makefile
@@@ -2,8 -2,8 +2,8 @@@
  VERSION = 5
  PATCHLEVEL = 11
  SUBLEVEL = 0
 -EXTRAVERSION = -rc2
 -NAME = Kleptomaniac Octopus
 +EXTRAVERSION =
 +NAME = ðŸ’• Valentine's Day Edition ðŸ’•
  
  # *DOCUMENTATION*
  # To see a list of typical targets execute "make help"
@@@ -452,6 -452,7 +452,6 @@@ AWK                = aw
  INSTALLKERNEL  := installkernel
  DEPMOD                = depmod
  PERL          = perl
 -PYTHON                = python
  PYTHON3               = python3
  CHECK         = sparse
  BASH          = bash
@@@ -507,7 -508,7 +507,7 @@@ CLANG_FLAGS :
  
  export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
  export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
 -export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
 +export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
  export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
  export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
  
@@@ -648,8 -649,7 +648,8 @@@ ifeq ($(KBUILD_EXTMOD),
  core-y                := init/ usr/
  drivers-y     := drivers/ sound/
  drivers-$(CONFIG_SAMPLES) += samples/
 -drivers-y     += net/ virt/
 +drivers-$(CONFIG_NET) += net/
 +drivers-y     += virt/
  libs-y                := lib/
  endif # KBUILD_EXTMOD
  
@@@ -812,12 -812,10 +812,12 @@@ KBUILD_CFLAGS   += -ftrivial-auto-var-ini
  KBUILD_CFLAGS += -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang
  endif
  
 +DEBUG_CFLAGS  :=
 +
  # Workaround for GCC versions < 5.0
  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801
  ifdef CONFIG_CC_IS_GCC
 -DEBUG_CFLAGS  := $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments))
 +DEBUG_CFLAGS  += $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments))
  endif
  
  ifdef CONFIG_DEBUG_INFO
@@@ -862,6 -860,9 +862,9 @@@ ifdef CONFIG_FTRACE_MCOUNT_USE_C
      endif
    endif
  endif
+ ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL
+   CC_FLAGS_USING      += -DCC_USING_NOP_MCOUNT
+ endif
  ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
    ifdef CONFIG_HAVE_C_RECORDMCOUNT
      BUILD_C_RECORDMCOUNT := y
@@@ -909,7 -910,8 +912,8 @@@ KBUILD_LDFLAGS += -mllvm -import-instr-
  endif
  
  ifdef CONFIG_LTO
- KBUILD_CFLAGS += $(CC_FLAGS_LTO)
+ KBUILD_CFLAGS += -fno-lto $(CC_FLAGS_LTO)
+ KBUILD_AFLAGS += -fno-lto
  export CC_FLAGS_LTO
  endif
  
@@@ -964,6 -966,12 +968,6 @@@ KBUILD_CFLAGS   += $(call cc-option,-We
  # change __FILE__ to the relative path from the srctree
  KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
  
 -# ensure -fcf-protection is disabled when using retpoline as it is
 -# incompatible with -mindirect-branch=thunk-extern
 -ifdef CONFIG_RETPOLINE
 -KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
 -endif
 -
  # include additional Makefiles when needed
  include-y                     := scripts/Makefile.extrawarn
  include-$(CONFIG_KASAN)               += scripts/Makefile.kasan
@@@ -1096,17 -1104,6 +1100,17 @@@ ifdef CONFIG_STACK_VALIDATIO
    endif
  endif
  
 +PHONY += resolve_btfids_clean
 +
 +resolve_btfids_O = $(abspath $(objtree))/tools/bpf/resolve_btfids
 +
 +# tools/bpf/resolve_btfids directory might not exist
 +# in output directory, skip its clean in that case
 +resolve_btfids_clean:
 +ifneq ($(wildcard $(resolve_btfids_O)),)
 +      $(Q)$(MAKE) -sC $(srctree)/tools/bpf/resolve_btfids O=$(resolve_btfids_O) clean
 +endif
 +
  ifdef CONFIG_BPF
  ifdef CONFIG_DEBUG_INFO_BTF
    ifeq ($(has_libelf),1)
@@@ -1243,6 -1240,10 +1247,10 @@@ uapi-asm-generic
  PHONY += prepare-objtool prepare-resolve_btfids
  prepare-objtool: $(objtool_target)
  ifeq ($(SKIP_STACK_VALIDATION),1)
+ ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL
+       @echo "error: Cannot generate __mcount_loc for CONFIG_DYNAMIC_FTRACE=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
+       @false
+ endif
  ifdef CONFIG_UNWINDER_ORC
        @echo "error: Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
        @false
@@@ -1358,9 -1359,6 +1366,9 @@@ ifneq ($(dtstree),
  %.dtb: include/config/kernel.release scripts_dtc
        $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
  
 +%.dtbo: include/config/kernel.release scripts_dtc
 +      $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
 +
  PHONY += dtbs dtbs_install dtbs_check
  dtbs: include/config/kernel.release scripts_dtc
        $(Q)$(MAKE) $(build)=$(dtstree)
@@@ -1519,7 -1517,7 +1527,7 @@@ vmlinuxclean
        $(Q)$(CONFIG_SHELL) $(srctree)/scripts/link-vmlinux.sh clean
        $(Q)$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) clean)
  
 -clean: archclean vmlinuxclean
 +clean: archclean vmlinuxclean resolve_btfids_clean
  
  # mrproper - Delete all generated files, including .config
  #
@@@ -1840,7 -1838,7 +1848,7 @@@ clean: $(clean-dirs
        @find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \
                \( -name '*.[aios]' -o -name '*.ko' -o -name '.*.cmd' \
                -o -name '*.ko.*' \
 -              -o -name '*.dtb' -o -name '*.dtb.S' -o -name '*.dt.yaml' \
 +              -o -name '*.dtb' -o -name '*.dtbo' -o -name '*.dtb.S' -o -name '*.dt.yaml' \
                -o -name '*.dwo' -o -name '*.lst' \
                -o -name '*.su' -o -name '*.mod' \
                -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
diff --combined arch/x86/Kconfig
@@@ -19,7 -19,6 +19,7 @@@ config X86_3
        select KMAP_LOCAL
        select MODULES_USE_ELF_REL
        select OLD_SIGACTION
 +      select ARCH_SPLIT_ARG64
  
  config X86_64
        def_bool y
@@@ -32,7 -31,6 +32,7 @@@
        select MODULES_USE_ELF_RELA
        select NEED_DMA_MAP_STATE
        select SWIOTLB
 +      select ARCH_HAS_ELFCORE_COMPAT
  
  config FORCE_DYNAMIC_FTRACE
        def_bool y
@@@ -97,6 -95,8 +97,8 @@@ config X8
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC
        select ARCH_SUPPORTS_NUMA_BALANCING     if X86_64
        select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP       if NR_CPUS <= 4096
+       select ARCH_SUPPORTS_LTO_CLANG          if X86_64
+       select ARCH_SUPPORTS_LTO_CLANG_THIN     if X86_64
        select ARCH_USE_BUILTIN_BSWAP
        select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_USE_QUEUED_SPINLOCKS
        select HAVE_CONTEXT_TRACKING            if X86_64
        select HAVE_CONTEXT_TRACKING_OFFSTACK   if HAVE_CONTEXT_TRACKING
        select HAVE_C_RECORDMCOUNT
+       select HAVE_OBJTOOL_MCOUNT              if STACK_VALIDATION
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_CONTIGUOUS
        select HAVE_DYNAMIC_FTRACE
        select HAVE_MOVE_PMD
        select HAVE_MOVE_PUD
        select HAVE_NMI
 -      select HAVE_OPROFILE
        select HAVE_OPTPROBES
        select HAVE_PCSPKR_PLATFORM
        select HAVE_PERF_EVENTS
        select HAVE_STACK_VALIDATION            if X86_64
        select HAVE_STATIC_CALL
        select HAVE_STATIC_CALL_INLINE          if HAVE_STACK_VALIDATION
 +      select HAVE_PREEMPT_DYNAMIC
        select HAVE_RSEQ
        select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_UNSTABLE_SCHED_CLOCK
@@@ -891,7 -892,7 +894,7 @@@ config HPET_TIME
  
  config HPET_EMULATE_RTC
        def_bool y
 -      depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
 +      depends on HPET_TIMER && (RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
  
  config APB_TIMER
        def_bool y if X86_INTEL_MID
@@@ -1159,6 -1160,10 +1162,6 @@@ config X86_MCE_INJEC
          If you don't know what a machine check is and you don't do kernel
          QA it is safe to say n.
  
 -config X86_THERMAL_VECTOR
 -      def_bool y
 -      depends on X86_MCE_INTEL
 -
  source "arch/x86/events/Kconfig"
  
  config X86_LEGACY_VM86
@@@ -2861,6 -2866,7 +2864,6 @@@ config IA32_EMULATIO
        depends on X86_64
        select ARCH_WANT_OLD_COMPAT_IPC
        select BINFMT_ELF
 -      select COMPAT_BINFMT_ELF
        select COMPAT_OLD_SIGACTION
        help
          Include code to run legacy 32-bit programs under a
diff --combined arch/x86/Makefile
@@@ -50,9 -50,6 +50,9 @@@ export BIT
  KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
  KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
  
 +# Intel CET isn't enabled in the kernel
 +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
 +
  ifeq ($(CONFIG_X86_32),y)
          BITS := 32
          UTS_MACHINE := i386
@@@ -169,6 -166,11 +169,11 @@@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1
        KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
  endif
  
+ ifdef CONFIG_LTO_CLANG
+ KBUILD_LDFLAGS        += -plugin-opt=-code-model=kernel \
+                  -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+ endif
  # Workaround for a gcc prelease that unfortunately was shipped in a suse release
  KBUILD_CFLAGS += -Wno-sign-compare
  #
@@@ -232,6 -234,9 +237,6 @@@ core-y += arch/x86
  drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
  drivers-$(CONFIG_PCI)            += arch/x86/pci/
  
 -# must be linked after kernel/
 -drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
 -
  # suspend and hibernation support
  drivers-$(CONFIG_PM) += arch/x86/power/
  
@@@ -292,20 -297,16 +297,20 @@@ archclean
        $(Q)$(MAKE) $(clean)=arch/x86/tools
  
  define archhelp
 -  echo  '* bzImage      - Compressed kernel image (arch/x86/boot/bzImage)'
 -  echo  '  install      - Install kernel using'
 -  echo  '                  (your) ~/bin/$(INSTALLKERNEL) or'
 -  echo  '                  (distribution) /sbin/$(INSTALLKERNEL) or'
 -  echo  '                  install to $$(INSTALL_PATH) and run lilo'
 -  echo  '  fdimage      - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
 -  echo  '  fdimage144   - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
 -  echo  '  fdimage288   - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
 -  echo  '  isoimage     - Create a boot CD-ROM image (arch/x86/boot/image.iso)'
 -  echo  '                  bzdisk/fdimage*/isoimage also accept:'
 -  echo  '                  FDARGS="..."  arguments for the booted kernel'
 -  echo  '                  FDINITRD=file initrd for the booted kernel'
 +  echo  '* bzImage            - Compressed kernel image (arch/x86/boot/bzImage)'
 +  echo  '  install            - Install kernel using (your) ~/bin/$(INSTALLKERNEL) or'
 +  echo  '                       (distribution) /sbin/$(INSTALLKERNEL) or install to '
 +  echo  '                       $$(INSTALL_PATH) and run lilo'
 +  echo  ''
 +  echo  '  fdimage            - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
 +  echo  '  fdimage144         - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
 +  echo  '  fdimage288         - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
 +  echo  '  isoimage           - Create a boot CD-ROM image (arch/x86/boot/image.iso)'
 +  echo  '                       bzdisk/fdimage*/isoimage also accept:'
 +  echo  '                       FDARGS="..."  arguments for the booted kernel'
 +  echo  '                       FDINITRD=file initrd for the booted kernel'
 +  echo  ''
 +  echo  '  kvm_guest.config   - Enable Kconfig items for running this kernel as a KVM guest'
 +  echo  '  xen.config         - Enable Kconfig items for running this kernel as a Xen guest'
 +
  endef
diff --combined arch/x86/power/Makefile
@@@ -1,8 -1,13 +1,12 @@@
  # SPDX-License-Identifier: GPL-2.0
 -OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y
  
  # __restore_processor_state() restores %gs after S3 resume and so should not
  # itself be stack-protected
  CFLAGS_cpu.o  := -fno-stack-protector
  
+ # Clang may incorrectly inline functions with stack protector enabled into
+ # __restore_processor_state(): https://bugs.llvm.org/show_bug.cgi?id=47479
+ CFLAGS_REMOVE_cpu.o := $(CC_FLAGS_LTO)
  obj-$(CONFIG_PM_SLEEP)                += cpu.o
  obj-$(CONFIG_HIBERNATION)     += hibernate_$(BITS).o hibernate_asm_$(BITS).o hibernate.o
diff --combined kernel/trace/Kconfig
@@@ -60,6 -60,11 +60,11 @@@ config HAVE_NOP_MCOUN
        help
          Arch supports the gcc options -pg with -mrecord-mcount and -nop-mcount
  
+ config HAVE_OBJTOOL_MCOUNT
+       bool
+       help
+         Arch supports objtool --mcount
  config HAVE_C_RECORDMCOUNT
        bool
        help
@@@ -538,14 -543,14 +543,14 @@@ config KPROBE_EVENT
  config KPROBE_EVENTS_ON_NOTRACE
        bool "Do NOT protect notrace function from kprobe events"
        depends on KPROBE_EVENTS
 -      depends on KPROBES_ON_FTRACE
 +      depends on DYNAMIC_FTRACE
        default n
        help
          This is only for the developers who want to debug ftrace itself
          using kprobe events.
  
          If kprobes can use ftrace instead of breakpoint, ftrace related
 -        functions are protected from kprobe-events to prevent an infinit
 +        functions are protected from kprobe-events to prevent an infinite
          recursion or any unexpected execution path which leads to a kernel
          crash.
  
@@@ -612,10 -617,18 +617,18 @@@ config FTRACE_MCOUNT_USE_C
        depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
        depends on FTRACE_MCOUNT_RECORD
  
+ config FTRACE_MCOUNT_USE_OBJTOOL
+       def_bool y
+       depends on HAVE_OBJTOOL_MCOUNT
+       depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
+       depends on !FTRACE_MCOUNT_USE_CC
+       depends on FTRACE_MCOUNT_RECORD
  config FTRACE_MCOUNT_USE_RECORDMCOUNT
        def_bool y
        depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
        depends on !FTRACE_MCOUNT_USE_CC
+       depends on !FTRACE_MCOUNT_USE_OBJTOOL
        depends on FTRACE_MCOUNT_RECORD
  
  config TRACING_MAP
@@@ -902,10 -915,6 +915,10 @@@ config PREEMPTIRQ_DELAY_TES
          irq-disabled critical sections for 500us:
          modprobe preemptirq_delay_test test_mode=irq delay=500 burst_size=3
  
 +        What's more, if you want to attach the test on the cpu which the latency
 +        tracer is running on, specify cpu_affinity=cpu_num at the end of the
 +        command.
 +
          If unsure, say N
  
  config SYNTH_EVENT_GEN_TEST
diff --combined scripts/Makefile.lib
@@@ -86,9 -86,7 +86,9 @@@ extra-$(CONFIG_OF_ALL_DTBS)   += $(dtb-
  
  ifneq ($(CHECK_DTBS),)
  extra-y += $(patsubst %.dtb,%.dt.yaml, $(dtb-y))
 +extra-y += $(patsubst %.dtbo,%.dt.yaml, $(dtb-y))
  extra-$(CONFIG_OF_ALL_DTBS) += $(patsubst %.dtb,%.dt.yaml, $(dtb-))
 +extra-$(CONFIG_OF_ALL_DTBS) += $(patsubst %.dtbo,%.dt.yaml, $(dtb-))
  endif
  
  # Add subdir path
@@@ -222,6 -220,18 +222,18 @@@ dtc_cpp_flags  = -Wp,-MMD,$(depfile).pr
                 $(addprefix -I,$(DTC_INCLUDE))                          \
                 -undef -D__DTS__
  
+ # Objtool arguments are also needed for modfinal with LTO, so we define
+ # then here to avoid duplication.
+ objtool_args =                                                                \
+       $(if $(CONFIG_UNWINDER_ORC),orc generate,check)                 \
+       $(if $(part-of-module), --module,)                              \
+       $(if $(CONFIG_FRAME_POINTER),, --no-fp)                         \
+       $(if $(or $(CONFIG_GCOV_KERNEL),$(CONFIG_LTO_CLANG)),           \
+               --no-unreachable,)                                      \
+       $(if $(CONFIG_RETPOLINE), --retpoline,)                         \
+       $(if $(CONFIG_X86_SMAP), --uaccess,)                            \
+       $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount,)
  # Useful for describing the dependency of composite objects
  # Usage:
  #   $(call multi_depend, multi_used_targets, suffix_to_remove, suffix_to_add)
@@@ -331,9 -341,6 +343,9 @@@ cmd_dtc = $(HOSTCC) -E $(dtc_cpp_flags
  $(obj)/%.dtb: $(src)/%.dts $(DTC) FORCE
        $(call if_changed_dep,dtc)
  
 +$(obj)/%.dtbo: $(src)/%.dts $(DTC) FORCE
 +      $(call if_changed_dep,dtc)
 +
  DT_CHECKER ?= dt-validate
  DT_BINDING_DIR := Documentation/devicetree/bindings
  # DT_TMP_SCHEMA may be overridden from Documentation/devicetree/bindings/Makefile
  
  #include <subcmd/parse-options.h>
  #include <string.h>
 -#include "builtin.h"
 -#include "objtool.h"
 +#include <objtool/builtin.h>
 +#include <objtool/objtool.h>
  
- bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
+ bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
  
  static const char * const check_usage[] = {
        "objtool check [<options>] file.o",
@@@ -34,13 -34,15 +34,15 @@@ const struct option check_options[] = 
        OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
        OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
        OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"),
+       OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"),
        OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
+       OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
        OPT_END(),
  };
  
  int cmd_check(int argc, const char **argv)
  {
-       const char *objname, *s;
+       const char *objname;
        struct objtool_file *file;
        int ret;
  
  
        objname = argv[0];
  
-       s = strstr(objname, "vmlinux.o");
-       if (s && !s[9])
-               vmlinux = true;
        file = objtool_open_read(objname);
        if (!file)
                return 1;
diff --combined tools/objtool/check.c
@@@ -6,20 -6,21 +6,20 @@@
  #include <string.h>
  #include <stdlib.h>
  
 -#include "builtin.h"
 -#include "cfi.h"
 -#include "arch.h"
 -#include "check.h"
 -#include "special.h"
 -#include "warn.h"
 -#include "arch_elf.h"
 +#include <arch/elf.h>
 +#include <objtool/builtin.h>
 +#include <objtool/cfi.h>
 +#include <objtool/arch.h>
 +#include <objtool/check.h>
 +#include <objtool/special.h>
 +#include <objtool/warn.h>
 +#include <objtool/endianness.h>
  
  #include <linux/objtool.h>
  #include <linux/hashtable.h>
  #include <linux/kernel.h>
  #include <linux/static_call_types.h>
  
 -#define FAKE_JUMP_OFFSET -1
 -
  struct alternative {
        struct list_head list;
        struct instruction *insn;
@@@ -110,20 -111,15 +110,20 @@@ static struct instruction *prev_insn_sa
  
  static bool is_sibling_call(struct instruction *insn)
  {
 +      /*
 +       * Assume only ELF functions can make sibling calls.  This ensures
 +       * sibling call detection consistency between vmlinux.o and individual
 +       * objects.
 +       */
 +      if (!insn->func)
 +              return false;
 +
        /* An indirect jump is either a sibling call or a jump to a table. */
        if (insn->type == INSN_JUMP_DYNAMIC)
                return list_empty(&insn->alts);
  
 -      if (!is_static_jump(insn))
 -              return false;
 -
        /* add_jump_destinations() sets insn->call_dest for sibling calls. */
 -      return !!insn->call_dest;
 +      return (is_static_jump(insn) && insn->call_dest);
  }
  
  /*
@@@ -160,7 -156,6 +160,7 @@@ static bool __dead_end_function(struct 
                "machine_real_restart",
                "rewind_stack_do_exit",
                "kunit_try_catch_throw",
 +              "xen_start_kernel",
        };
  
        if (!func)
@@@ -249,7 -244,7 +249,7 @@@ static void init_insn_state(struct insn
         * not correctly determine insn->call_dest->sec (external symbols do
         * not have a section).
         */
-       if (vmlinux && sec)
+       if (vmlinux && noinstr && sec)
                state->noinstr = sec->noinstr;
  }
  
@@@ -507,21 -502,8 +507,21 @@@ static int create_static_call_sections(
  
                key_sym = find_symbol_by_name(file->elf, tmp);
                if (!key_sym) {
 -                      WARN("static_call: can't find static_call_key symbol: %s", tmp);
 -                      return -1;
 +                      if (!module) {
 +                              WARN("static_call: can't find static_call_key symbol: %s", tmp);
 +                              return -1;
 +                      }
 +
 +                      /*
 +                       * For modules(), the key might not be exported, which
 +                       * means the module can make static calls but isn't
 +                       * allowed to change them.
 +                       *
 +                       * In that case we temporarily set the key to be the
 +                       * trampoline address.  This is fixed up in
 +                       * static_call_add_module().
 +                       */
 +                      key_sym = insn->call_dest;
                }
                free(key_name);
  
        return 0;
  }
  
+ static int create_mcount_loc_sections(struct objtool_file *file)
+ {
+       struct section *sec, *reloc_sec;
+       struct reloc *reloc;
+       unsigned long *loc;
+       struct instruction *insn;
+       int idx;
+       sec = find_section_by_name(file->elf, "__mcount_loc");
+       if (sec) {
+               INIT_LIST_HEAD(&file->mcount_loc_list);
+               WARN("file already has __mcount_loc section, skipping");
+               return 0;
+       }
+       if (list_empty(&file->mcount_loc_list))
+               return 0;
+       idx = 0;
+       list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node)
+               idx++;
+       sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx);
+       if (!sec)
+               return -1;
+       reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+       if (!reloc_sec)
+               return -1;
+       idx = 0;
+       list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) {
+               loc = (unsigned long *)sec->data->d_buf + idx;
+               memset(loc, 0, sizeof(unsigned long));
+               reloc = malloc(sizeof(*reloc));
+               if (!reloc) {
+                       perror("malloc");
+                       return -1;
+               }
+               memset(reloc, 0, sizeof(*reloc));
+               if (insn->sec->sym) {
+                       reloc->sym = insn->sec->sym;
+                       reloc->addend = insn->offset;
+               } else {
+                       reloc->sym = find_symbol_containing(insn->sec, insn->offset);
+                       if (!reloc->sym) {
+                               WARN("missing symbol for insn at offset 0x%lx\n",
+                                    insn->offset);
+                               return -1;
+                       }
+                       reloc->addend = insn->offset - reloc->sym->offset;
+               }
+               reloc->type = R_X86_64_64;
+               reloc->offset = idx * sizeof(unsigned long);
+               reloc->sec = reloc_sec;
+               elf_add_reloc(file->elf, reloc);
+               idx++;
+       }
+       if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+               return -1;
+       return 0;
+ }
  /*
   * Warnings shouldn't be reported for ignored functions.
   */
@@@ -792,16 -846,22 +864,16 @@@ static int add_jump_destinations(struc
                if (!is_static_jump(insn))
                        continue;
  
 -              if (insn->offset == FAKE_JUMP_OFFSET)
 -                      continue;
 -
                reloc = find_reloc_by_dest_range(file->elf, insn->sec,
 -                                             insn->offset, insn->len);
 +                                               insn->offset, insn->len);
                if (!reloc) {
                        dest_sec = insn->sec;
                        dest_off = arch_jump_destination(insn);
                } else if (reloc->sym->type == STT_SECTION) {
                        dest_sec = reloc->sym->sec;
                        dest_off = arch_dest_reloc_offset(reloc->addend);
 -              } else if (reloc->sym->sec->idx) {
 -                      dest_sec = reloc->sym->sec;
 -                      dest_off = reloc->sym->sym.st_value +
 -                                 arch_dest_reloc_offset(reloc->addend);
 -              } else if (strstr(reloc->sym->name, "_indirect_thunk_")) {
 +              } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
 +                         !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
                        /*
                         * Retpoline jumps are really dynamic jumps in
                         * disguise, so convert them accordingly.
  
                        insn->retpoline_safe = true;
                        continue;
 -              } else {
 -                      /* external sibling call */
 +              } else if (insn->func) {
 +                      /* internal or external sibling call (with reloc) */
                        insn->call_dest = reloc->sym;
                        if (insn->call_dest->static_call_tramp) {
                                list_add_tail(&insn->static_call_node,
                                              &file->static_call_list);
                        }
                        continue;
 +              } else if (reloc->sym->sec->idx) {
 +                      dest_sec = reloc->sym->sec;
 +                      dest_off = reloc->sym->sym.st_value +
 +                                 arch_dest_reloc_offset(reloc->addend);
 +              } else {
 +                      /* non-func asm code jumping to another file */
 +                      continue;
                }
  
                insn->jump_dest = find_insn(file, dest_sec, dest_off);
                         * case where the parent function's only reference to a
                         * subfunction is through a jump table.
                         */
 -                      if (!strstr(insn->func->name, ".cold.") &&
 -                          strstr(insn->jump_dest->func->name, ".cold.")) {
 +                      if (!strstr(insn->func->name, ".cold") &&
 +                          strstr(insn->jump_dest->func->name, ".cold")) {
                                insn->func->cfunc = insn->jump_dest->func;
                                insn->jump_dest->func->pfunc = insn->func;
  
                        } else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
                                   insn->jump_dest->offset == insn->jump_dest->func->offset) {
  
 -                              /* internal sibling call */
 +                              /* internal sibling call (without reloc) */
                                insn->call_dest = insn->jump_dest->func;
                                if (insn->call_dest->static_call_tramp) {
                                        list_add_tail(&insn->static_call_node,
@@@ -975,6 -1028,22 +1047,22 @@@ static int add_call_destinations(struc
                        insn->type = INSN_NOP;
                }
  
+               if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) {
+                       if (reloc) {
+                               reloc->type = R_NONE;
+                               elf_write_reloc(file->elf, reloc);
+                       }
+                       elf_write_insn(file->elf, insn->sec,
+                                      insn->offset, insn->len,
+                                      arch_nop_insn(insn->len));
+                       insn->type = INSN_NOP;
+                       list_add_tail(&insn->mcount_loc_node,
+                                     &file->mcount_loc_list);
+               }
                /*
                 * Whatever stack impact regular CALLs have, should be undone
                 * by the RETURN of the called function.
  }
  
  /*
 - * The .alternatives section requires some extra special care, over and above
 - * what other special sections require:
 - *
 - * 1. Because alternatives are patched in-place, we need to insert a fake jump
 - *    instruction at the end so that validate_branch() skips all the original
 - *    replaced instructions when validating the new instruction path.
 - *
 - * 2. An added wrinkle is that the new instruction length might be zero.  In
 - *    that case the old instructions are replaced with noops.  We simulate that
 - *    by creating a fake jump as the only new instruction.
 - *
 - * 3. In some cases, the alternative section includes an instruction which
 - *    conditionally jumps to the _end_ of the entry.  We have to modify these
 - *    jumps' destinations to point back to .text rather than the end of the
 - *    entry in .altinstr_replacement.
 + * The .alternatives section requires some extra special care over and above
 + * other special sections because alternatives are patched in place.
   */
  static int handle_group_alt(struct objtool_file *file,
                            struct special_alt *special_alt,
                            struct instruction *orig_insn,
                            struct instruction **new_insn)
  {
 -      static unsigned int alt_group_next_index = 1;
 -      struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
 -      unsigned int alt_group = alt_group_next_index++;
 +      struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL;
 +      struct alt_group *orig_alt_group, *new_alt_group;
        unsigned long dest_off;
  
 +
 +      orig_alt_group = malloc(sizeof(*orig_alt_group));
 +      if (!orig_alt_group) {
 +              WARN("malloc failed");
 +              return -1;
 +      }
 +      orig_alt_group->cfi = calloc(special_alt->orig_len,
 +                                   sizeof(struct cfi_state *));
 +      if (!orig_alt_group->cfi) {
 +              WARN("calloc failed");
 +              return -1;
 +      }
 +
        last_orig_insn = NULL;
        insn = orig_insn;
        sec_for_each_insn_from(file, insn) {
                if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
                        break;
  
 -              insn->alt_group = alt_group;
 +              insn->alt_group = orig_alt_group;
                last_orig_insn = insn;
        }
 +      orig_alt_group->orig_group = NULL;
 +      orig_alt_group->first_insn = orig_insn;
 +      orig_alt_group->last_insn = last_orig_insn;
 +
 +
 +      new_alt_group = malloc(sizeof(*new_alt_group));
 +      if (!new_alt_group) {
 +              WARN("malloc failed");
 +              return -1;
 +      }
  
 -      if (next_insn_same_sec(file, last_orig_insn)) {
 -              fake_jump = malloc(sizeof(*fake_jump));
 -              if (!fake_jump) {
 +      if (special_alt->new_len < special_alt->orig_len) {
 +              /*
 +               * Insert a fake nop at the end to make the replacement
 +               * alt_group the same size as the original.  This is needed to
 +               * allow propagate_alt_cfi() to do its magic.  When the last
 +               * instruction affects the stack, the instruction after it (the
 +               * nop) will propagate the new state to the shared CFI array.
 +               */
 +              nop = malloc(sizeof(*nop));
 +              if (!nop) {
                        WARN("malloc failed");
                        return -1;
                }
 -              memset(fake_jump, 0, sizeof(*fake_jump));
 -              INIT_LIST_HEAD(&fake_jump->alts);
 -              INIT_LIST_HEAD(&fake_jump->stack_ops);
 -              init_cfi_state(&fake_jump->cfi);
 +              memset(nop, 0, sizeof(*nop));
 +              INIT_LIST_HEAD(&nop->alts);
 +              INIT_LIST_HEAD(&nop->stack_ops);
 +              init_cfi_state(&nop->cfi);
  
 -              fake_jump->sec = special_alt->new_sec;
 -              fake_jump->offset = FAKE_JUMP_OFFSET;
 -              fake_jump->type = INSN_JUMP_UNCONDITIONAL;
 -              fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
 -              fake_jump->func = orig_insn->func;
 +              nop->sec = special_alt->new_sec;
 +              nop->offset = special_alt->new_off + special_alt->new_len;
 +              nop->len = special_alt->orig_len - special_alt->new_len;
 +              nop->type = INSN_NOP;
 +              nop->func = orig_insn->func;
 +              nop->alt_group = new_alt_group;
 +              nop->ignore = orig_insn->ignore_alts;
        }
  
        if (!special_alt->new_len) {
 -              if (!fake_jump) {
 -                      WARN("%s: empty alternative at end of section",
 -                           special_alt->orig_sec->name);
 -                      return -1;
 -              }
 -
 -              *new_insn = fake_jump;
 -              return 0;
 +              *new_insn = nop;
 +              goto end;
        }
  
 -      last_new_insn = NULL;
 -      alt_group = alt_group_next_index++;
        insn = *new_insn;
        sec_for_each_insn_from(file, insn) {
                struct reloc *alt_reloc;
  
                insn->ignore = orig_insn->ignore_alts;
                insn->func = orig_insn->func;
 -              insn->alt_group = alt_group;
 +              insn->alt_group = new_alt_group;
  
                /*
                 * Since alternative replacement code is copy/pasted by the
                        continue;
  
                dest_off = arch_jump_destination(insn);
 -              if (dest_off == special_alt->new_off + special_alt->new_len) {
 -                      if (!fake_jump) {
 -                              WARN("%s: alternative jump to end of section",
 -                                   special_alt->orig_sec->name);
 -                              return -1;
 -                      }
 -                      insn->jump_dest = fake_jump;
 -              }
 +              if (dest_off == special_alt->new_off + special_alt->new_len)
 +                      insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
  
                if (!insn->jump_dest) {
                        WARN_FUNC("can't find alternative jump destination",
                return -1;
        }
  
 -      if (fake_jump)
 -              list_add(&fake_jump->list, &last_new_insn->list);
 -
 +      if (nop)
 +              list_add(&nop->list, &last_new_insn->list);
 +end:
 +      new_alt_group->orig_group = orig_alt_group;
 +      new_alt_group->first_insn = *new_insn;
 +      new_alt_group->last_insn = nop ? : last_new_insn;
 +      new_alt_group->cfi = orig_alt_group->cfi;
        return 0;
  }
  
@@@ -1418,20 -1479,13 +1506,20 @@@ static int add_jump_table_alts(struct o
        return 0;
  }
  
 +static void set_func_state(struct cfi_state *state)
 +{
 +      state->cfa = initial_func_cfi.cfa;
 +      memcpy(&state->regs, &initial_func_cfi.regs,
 +             CFI_NUM_REGS * sizeof(struct cfi_reg));
 +      state->stack_size = initial_func_cfi.cfa.offset;
 +}
 +
  static int read_unwind_hints(struct objtool_file *file)
  {
        struct section *sec, *relocsec;
        struct reloc *reloc;
        struct unwind_hint *hint;
        struct instruction *insn;
 -      struct cfi_reg *cfa;
        int i;
  
        sec = find_section_by_name(file->elf, ".discard.unwind_hints");
                        return -1;
                }
  
 -              cfa = &insn->cfi.cfa;
 +              insn->hint = true;
  
 -              if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) {
 -                      insn->ret_offset = hint->sp_offset;
 +              if (hint->type == UNWIND_HINT_TYPE_FUNC) {
 +                      set_func_state(&insn->cfi);
                        continue;
                }
  
 -              insn->hint = true;
 -
                if (arch_decode_hint_reg(insn, hint->sp_reg)) {
                        WARN_FUNC("unsupported unwind_hint sp base reg %d",
                                  insn->sec, insn->offset, hint->sp_reg);
                        return -1;
                }
  
 -              cfa->offset = hint->sp_offset;
 +              insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset);
                insn->cfi.type = hint->type;
                insn->cfi.end = hint->end;
        }
@@@ -1735,18 -1791,27 +1823,18 @@@ static bool is_fentry_call(struct instr
  
  static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state)
  {
 -      u8 ret_offset = insn->ret_offset;
        struct cfi_state *cfi = &state->cfi;
        int i;
  
        if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap)
                return true;
  
 -      if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset)
 +      if (cfi->cfa.offset != initial_func_cfi.cfa.offset)
                return true;
  
 -      if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset)
 +      if (cfi->stack_size != initial_func_cfi.cfa.offset)
                return true;
  
 -      /*
 -       * If there is a ret offset hint then don't check registers
 -       * because a callee-saved register might have been pushed on
 -       * the stack.
 -       */
 -      if (ret_offset)
 -              return false;
 -
        for (i = 0; i < CFI_NUM_REGS; i++) {
                if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
                    cfi->regs[i].offset != initial_func_cfi.regs[i].offset)
        return false;
  }
  
 +static bool check_reg_frame_pos(const struct cfi_reg *reg,
 +                              int expected_offset)
 +{
 +      return reg->base == CFI_CFA &&
 +             reg->offset == expected_offset;
 +}
 +
  static bool has_valid_stack_frame(struct insn_state *state)
  {
        struct cfi_state *cfi = &state->cfi;
  
 -      if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA &&
 -          cfi->regs[CFI_BP].offset == -16)
 +      if (cfi->cfa.base == CFI_BP &&
 +          check_reg_frame_pos(&cfi->regs[CFI_BP], -cfi->cfa.offset) &&
 +          check_reg_frame_pos(&cfi->regs[CFI_RA], -cfi->cfa.offset + 8))
                return true;
  
        if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP)
@@@ -1898,7 -1955,8 +1986,7 @@@ static int update_cfi_state(struct inst
                case OP_SRC_REG:
                        if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
                            cfa->base == CFI_SP &&
 -                          regs[CFI_BP].base == CFI_CFA &&
 -                          regs[CFI_BP].offset == -cfa->offset) {
 +                          check_reg_frame_pos(&regs[CFI_BP], -cfa->offset)) {
  
                                /* mov %rsp, %rbp */
                                cfa->base = op->dest.reg;
                                        cfa->offset = -cfi->vals[op->src.reg].offset;
                                        cfi->stack_size = cfa->offset;
  
 +                              } else if (cfa->base == CFI_SP &&
 +                                         cfi->vals[op->src.reg].base == CFI_SP_INDIRECT &&
 +                                         cfi->vals[op->src.reg].offset == cfa->offset) {
 +
 +                                      /*
 +                                       * Stack swizzle:
 +                                       *
 +                                       * 1: mov %rsp, (%[tos])
 +                                       * 2: mov %[tos], %rsp
 +                                       *    ...
 +                                       * 3: pop %rsp
 +                                       *
 +                                       * Where:
 +                                       *
 +                                       * 1 - places a pointer to the previous
 +                                       *     stack at the Top-of-Stack of the
 +                                       *     new stack.
 +                                       *
 +                                       * 2 - switches to the new stack.
 +                                       *
 +                                       * 3 - pops the Top-of-Stack to restore
 +                                       *     the original stack.
 +                                       *
 +                                       * Note: we set base to SP_INDIRECT
 +                                       * here and preserve offset. Therefore
 +                                       * when the unwinder reaches ToS it
 +                                       * will dereference SP and then add the
 +                                       * offset to find the next frame, IOW:
 +                                       * (%rsp) + offset.
 +                                       */
 +                                      cfa->base = CFI_SP_INDIRECT;
 +
                                } else {
                                        cfa->base = CFI_UNDEFINED;
                                        cfa->offset = 0;
                                break;
                        }
  
 +                      if (!cfi->drap && op->src.reg == CFI_SP &&
 +                          op->dest.reg == CFI_BP && cfa->base == CFI_SP &&
 +                          check_reg_frame_pos(&regs[CFI_BP], -cfa->offset + op->src.offset)) {
 +
 +                              /* lea disp(%rsp), %rbp */
 +                              cfa->base = CFI_BP;
 +                              cfa->offset -= op->src.offset;
 +                              cfi->bp_scratch = false;
 +                              break;
 +                      }
 +
                        if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
  
                                /* drap: lea disp(%rsp), %drap */
  
                case OP_SRC_POP:
                case OP_SRC_POPF:
 +                      if (op->dest.reg == CFI_SP && cfa->base == CFI_SP_INDIRECT) {
 +
 +                              /* pop %rsp; # restore from a stack swizzle */
 +                              cfa->base = CFI_SP;
 +                              break;
 +                      }
 +
                        if (!cfi->drap && op->dest.reg == cfa->base) {
  
                                /* pop %rbp */
                        break;
  
                case OP_SRC_REG_INDIRECT:
 +                      if (!cfi->drap && op->dest.reg == cfa->base &&
 +                          op->dest.reg == CFI_BP) {
 +
 +                              /* mov disp(%rsp), %rbp */
 +                              cfa->base = CFI_SP;
 +                              cfa->offset = cfi->stack_size;
 +                      }
 +
                        if (cfi->drap && op->src.reg == CFI_BP &&
                            op->src.offset == cfi->drap_offset) {
  
                                /* mov disp(%rbp), %reg */
                                /* mov disp(%rsp), %reg */
                                restore_reg(cfi, op->dest.reg);
 +
 +                      } else if (op->src.reg == CFI_SP &&
 +                                 op->src.offset == regs[op->dest.reg].offset + cfi->stack_size) {
 +
 +                              /* mov disp(%rsp), %reg */
 +                              restore_reg(cfi, op->dest.reg);
                        }
  
                        break;
                        /* mov reg, disp(%rsp) */
                        save_reg(cfi, op->src.reg, CFI_CFA,
                                 op->dest.offset - cfi->cfa.offset);
 +
 +              } else if (op->dest.reg == CFI_SP) {
 +
 +                      /* mov reg, disp(%rsp) */
 +                      save_reg(cfi, op->src.reg, CFI_CFA,
 +                               op->dest.offset - cfi->stack_size);
 +
 +              } else if (op->src.reg == CFI_SP && op->dest.offset == 0) {
 +
 +                      /* mov %rsp, (%reg); # setup a stack swizzle. */
 +                      cfi->vals[op->dest.reg].base = CFI_SP_INDIRECT;
 +                      cfi->vals[op->dest.reg].offset = cfa->offset;
                }
  
                break;
        return 0;
  }
  
 -static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
 +/*
 + * The stack layouts of alternatives instructions can sometimes diverge when
 + * they have stack modifications.  That's fine as long as the potential stack
 + * layouts don't conflict at any given potential instruction boundary.
 + *
 + * Flatten the CFIs of the different alternative code streams (both original
 + * and replacement) into a single shared CFI array which can be used to detect
 + * conflicts and nicely feed a linear array of ORC entries to the unwinder.
 + */
 +static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn)
  {
 -      struct stack_op *op;
 +      struct cfi_state **alt_cfi;
 +      int group_off;
  
 -      list_for_each_entry(op, &insn->stack_ops, list) {
 -              struct cfi_state old_cfi = state->cfi;
 -              int res;
 +      if (!insn->alt_group)
 +              return 0;
  
 -              res = update_cfi_state(insn, &state->cfi, op);
 -              if (res)
 -                      return res;
 +      alt_cfi = insn->alt_group->cfi;
 +      group_off = insn->offset - insn->alt_group->first_insn->offset;
  
 -              if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) {
 -                      WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
 +      if (!alt_cfi[group_off]) {
 +              alt_cfi[group_off] = &insn->cfi;
 +      } else {
 +              if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) {
 +                      WARN_FUNC("stack layout conflict in alternatives",
 +                                insn->sec, insn->offset);
                        return -1;
                }
 +      }
 +
 +      return 0;
 +}
 +
 +static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
 +{
 +      struct stack_op *op;
 +
 +      list_for_each_entry(op, &insn->stack_ops, list) {
 +
 +              if (update_cfi_state(insn, &state->cfi, op))
 +                      return 1;
  
                if (op->dest.type == OP_DEST_PUSHF) {
                        if (!state->uaccess_stack) {
@@@ -2528,20 -2485,28 +2616,20 @@@ static int validate_return(struct symbo
        return 0;
  }
  
 -/*
 - * Alternatives should not contain any ORC entries, this in turn means they
 - * should not contain any CFI ops, which implies all instructions should have
 - * the same same CFI state.
 - *
 - * It is possible to constuct alternatives that have unreachable holes that go
 - * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
 - * states which then results in ORC entries, which we just said we didn't want.
 - *
 - * Avoid them by copying the CFI entry of the first instruction into the whole
 - * alternative.
 - */
 -static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
 +static struct instruction *next_insn_to_validate(struct objtool_file *file,
 +                                               struct instruction *insn)
  {
 -      struct instruction *first_insn = insn;
 -      int alt_group = insn->alt_group;
 +      struct alt_group *alt_group = insn->alt_group;
  
 -      sec_for_each_insn_continue(file, insn) {
 -              if (insn->alt_group != alt_group)
 -                      break;
 -              insn->cfi = first_insn->cfi;
 -      }
 +      /*
 +       * Simulate the fact that alternatives are patched in-place.  When the
 +       * end of a replacement alt_group is reached, redirect objtool flow to
 +       * the end of the original alt_group.
 +       */
 +      if (alt_group && insn == alt_group->last_insn && alt_group->orig_group)
 +              return next_insn_same_sec(file, alt_group->orig_group->last_insn);
 +
 +      return next_insn_same_sec(file, insn);
  }
  
  /*
@@@ -2562,7 -2527,7 +2650,7 @@@ static int validate_branch(struct objto
        sec = insn->sec;
  
        while (1) {
 -              next_insn = next_insn_same_sec(file, insn);
 +              next_insn = next_insn_to_validate(file, insn);
  
                if (file->c_file && func && insn->func && func != insn->func->pfunc) {
                        WARN("%s() falls through to next function %s()",
  
                insn->visited |= visited;
  
 +              if (propagate_alt_cfi(file, insn))
 +                      return 1;
 +
                if (!insn->ignore_alts && !list_empty(&insn->alts)) {
                        bool skip_orig = false;
  
                                }
                        }
  
 -                      if (insn->alt_group)
 -                              fill_alternative_cfi(file, insn);
 -
                        if (skip_orig)
                                return 0;
                }
  
                case INSN_JUMP_CONDITIONAL:
                case INSN_JUMP_UNCONDITIONAL:
 -                      if (func && is_sibling_call(insn)) {
 +                      if (is_sibling_call(insn)) {
                                ret = validate_sibling_call(insn, &state);
                                if (ret)
                                        return ret;
  
                case INSN_JUMP_DYNAMIC:
                case INSN_JUMP_DYNAMIC_CONDITIONAL:
 -                      if (func && is_sibling_call(insn)) {
 +                      if (is_sibling_call(insn)) {
                                ret = validate_sibling_call(insn, &state);
                                if (ret)
                                        return ret;
                        break;
  
                case INSN_STD:
 -                      if (state.df)
 +                      if (state.df) {
                                WARN_FUNC("recursive STD", sec, insn->offset);
 +                              return 1;
 +                      }
  
                        state.df = true;
                        break;
  
                case INSN_CLD:
 -                      if (!state.df && func)
 +                      if (!state.df && func) {
                                WARN_FUNC("redundant CLD", sec, insn->offset);
 +                              return 1;
 +                      }
  
                        state.df = false;
                        break;
@@@ -2850,6 -2811,9 +2938,6 @@@ static bool ignore_unreachable_insn(str
            !strcmp(insn->sec->name, ".altinstr_aux"))
                return true;
  
 -      if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET)
 -              return true;
 -
        if (!insn->func)
                return false;
  
@@@ -2935,7 -2899,10 +3023,7 @@@ static int validate_section(struct objt
                        continue;
  
                init_insn_state(&state, sec);
 -              state.cfi.cfa = initial_func_cfi.cfa;
 -              memcpy(&state.cfi.regs, &initial_func_cfi.regs,
 -                     CFI_NUM_REGS * sizeof(struct cfi_reg));
 -              state.cfi.stack_size = initial_func_cfi.cfa.offset;
 +              set_func_state(&state.cfi);
  
                warnings += validate_symbol(file, sec, func, &state);
        }
@@@ -3048,11 -3015,22 +3136,18 @@@ int check(struct objtool_file *file
                goto out;
        warnings += ret;
  
+       if (mcount) {
+               ret = create_mcount_loc_sections(file);
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
+       }
  out:
 -      if (ret < 0) {
 -              /*
 -               *  Fatal error.  The binary is corrupt or otherwise broken in
 -               *  some way, or objtool itself is broken.  Fail the kernel
 -               *  build.
 -               */
 -              return ret;
 -      }
 -
 +      /*
 +       *  For now, don't fail the kernel build on fatal warnings.  These
 +       *  errors are still fairly common due to the growing matrix of
 +       *  supported toolchains and their recent pace of change.
 +       */
        return 0;
  }
@@@ -8,7 -8,7 +8,7 @@@
  #include <subcmd/parse-options.h>
  
  extern const struct option check_options[];
- extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
+ extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
  
  extern int cmd_check(int argc, const char **argv);
  extern int cmd_orc(int argc, const char **argv);
index 4891ead,0000000..f5be798
mode 100644,000000..100644
--- /dev/null
@@@ -1,93 -1,0 +1,94 @@@
 +/* SPDX-License-Identifier: GPL-2.0-or-later */
 +/*
 + * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
 + */
 +
 +#ifndef _CHECK_H
 +#define _CHECK_H
 +
 +#include <stdbool.h>
 +#include <objtool/cfi.h>
 +#include <objtool/arch.h>
 +
 +struct insn_state {
 +      struct cfi_state cfi;
 +      unsigned int uaccess_stack;
 +      bool uaccess;
 +      bool df;
 +      bool noinstr;
 +      s8 instr;
 +};
 +
 +struct alt_group {
 +      /*
 +       * Pointer from a replacement group to the original group.  NULL if it
 +       * *is* the original group.
 +       */
 +      struct alt_group *orig_group;
 +
 +      /* First and last instructions in the group */
 +      struct instruction *first_insn, *last_insn;
 +
 +      /*
 +       * Byte-offset-addressed len-sized array of pointers to CFI structs.
 +       * This is shared with the other alt_groups in the same alternative.
 +       */
 +      struct cfi_state **cfi;
 +};
 +
 +struct instruction {
 +      struct list_head list;
 +      struct hlist_node hash;
 +      struct list_head static_call_node;
++      struct list_head mcount_loc_node;
 +      struct section *sec;
 +      unsigned long offset;
 +      unsigned int len;
 +      enum insn_type type;
 +      unsigned long immediate;
 +      bool dead_end, ignore, ignore_alts;
 +      bool hint;
 +      bool retpoline_safe;
 +      s8 instr;
 +      u8 visited;
 +      struct alt_group *alt_group;
 +      struct symbol *call_dest;
 +      struct instruction *jump_dest;
 +      struct instruction *first_jump_src;
 +      struct reloc *jump_table;
 +      struct list_head alts;
 +      struct symbol *func;
 +      struct list_head stack_ops;
 +      struct cfi_state cfi;
 +};
 +
 +static inline bool is_static_jump(struct instruction *insn)
 +{
 +      return insn->type == INSN_JUMP_CONDITIONAL ||
 +             insn->type == INSN_JUMP_UNCONDITIONAL;
 +}
 +
 +static inline bool is_dynamic_jump(struct instruction *insn)
 +{
 +      return insn->type == INSN_JUMP_DYNAMIC ||
 +             insn->type == INSN_JUMP_DYNAMIC_CONDITIONAL;
 +}
 +
 +static inline bool is_jump(struct instruction *insn)
 +{
 +      return is_static_jump(insn) || is_dynamic_jump(insn);
 +}
 +
 +struct instruction *find_insn(struct objtool_file *file,
 +                            struct section *sec, unsigned long offset);
 +
 +#define for_each_insn(file, insn)                                     \
 +      list_for_each_entry(insn, &file->insn_list, list)
 +
 +#define sec_for_each_insn(file, sec, insn)                            \
 +      for (insn = find_insn(file, sec, 0);                            \
 +           insn && &insn->list != &file->insn_list &&                 \
 +                      insn->sec == sec;                               \
 +           insn = list_next_entry(insn, list))
 +
 +#endif /* _CHECK_H */
index e114642,0000000..e68e374
mode 100644,000000..100644
--- /dev/null
@@@ -1,31 -1,0 +1,32 @@@
 +/* SPDX-License-Identifier: GPL-2.0-or-later */
 +/*
 + * Copyright (C) 2020 Matt Helsley <mhelsley@vmware.com>
 + */
 +
 +#ifndef _OBJTOOL_H
 +#define _OBJTOOL_H
 +
 +#include <stdbool.h>
 +#include <linux/list.h>
 +#include <linux/hashtable.h>
 +
 +#include <objtool/elf.h>
 +
 +#define __weak __attribute__((weak))
 +
 +struct objtool_file {
 +      struct elf *elf;
 +      struct list_head insn_list;
 +      DECLARE_HASHTABLE(insn_hash, 20);
 +      struct list_head static_call_list;
++      struct list_head mcount_loc_list;
 +      bool ignore_unreachables, c_file, hints, rodata;
 +};
 +
 +struct objtool_file *objtool_open_read(const char *_objname);
 +
 +int check(struct objtool_file *file);
 +int orc_dump(const char *objname);
 +int orc_create(struct objtool_file *file);
 +
 +#endif /* _OBJTOOL_H */
diff --combined tools/objtool/objtool.c
@@@ -21,9 -21,9 +21,9 @@@
  #include <subcmd/pager.h>
  #include <linux/kernel.h>
  
 -#include "builtin.h"
 -#include "objtool.h"
 -#include "warn.h"
 +#include <objtool/builtin.h>
 +#include <objtool/objtool.h>
 +#include <objtool/warn.h>
  
  struct cmd_struct {
        const char *name;
@@@ -62,6 -62,7 +62,7 @@@ struct objtool_file *objtool_open_read(
        INIT_LIST_HEAD(&file.insn_list);
        hash_init(file.insn_hash);
        INIT_LIST_HEAD(&file.static_call_list);
+       INIT_LIST_HEAD(&file.mcount_loc_list);
        file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
        file.ignore_unreachables = no_unreachable;
        file.hints = false;