x86/static_call: Add inline static call implementation for x86-64
authorJosh Poimboeuf <jpoimboe@redhat.com>
Tue, 18 Aug 2020 13:57:45 +0000 (15:57 +0200)
committerIngo Molnar <mingo@kernel.org>
Tue, 1 Sep 2020 07:58:05 +0000 (09:58 +0200)
Add the inline static call implementation for x86-64. The generated code
is identical to the out-of-line case, except we move the trampoline into
it's own section.

Objtool uses the trampoline naming convention to detect all the call
sites. It then annotates those call sites in the .static_call_sites
section.

During boot (and module init), the call sites are patched to call
directly into the destination function.  The temporary trampoline is
then no longer used.

[peterz: merged trampolines, put trampoline in section]

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20200818135804.864271425@infradead.org
13 files changed:
arch/x86/Kconfig
arch/x86/include/asm/static_call.h
arch/x86/kernel/static_call.c
arch/x86/kernel/vmlinux.lds.S
include/asm-generic/vmlinux.lds.h
tools/include/linux/static_call_types.h [new file with mode: 0644]
tools/objtool/check.c
tools/objtool/check.h
tools/objtool/elf.c
tools/objtool/elf.h
tools/objtool/objtool.h
tools/objtool/orc_gen.c
tools/objtool/sync-check.sh

index 595c06b32b3a4c6089931fffec9cb7e653ada4c2..8a48d3eedb8475ff04abcae4e8cab6b45076a8ef 100644 (file)
@@ -216,6 +216,7 @@ config X86
        select HAVE_STACKPROTECTOR              if CC_HAS_SANE_STACKPROTECTOR
        select HAVE_STACK_VALIDATION            if X86_64
        select HAVE_STATIC_CALL
+       select HAVE_STATIC_CALL_INLINE          if HAVE_STACK_VALIDATION
        select HAVE_RSEQ
        select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_UNSTABLE_SCHED_CLOCK
@@ -231,6 +232,7 @@ config X86
        select RTC_MC146818_LIB
        select SPARSE_IRQ
        select SRCU
+       select STACK_VALIDATION                 if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
        select USER_STACKTRACE_SUPPORT
@@ -452,7 +454,6 @@ config GOLDFISH
 config RETPOLINE
        bool "Avoid speculative indirect branches in kernel"
        default y
-       select STACK_VALIDATION if HAVE_STACK_VALIDATION
        help
          Compile kernel with the retpoline compiler options to guard against
          kernel-to-user data leaks by avoiding speculative indirect
index 07aa8791cbfe1cdb98f3cbcd5091f880483ead11..33469ae3612cbf8b3b76a22818054dc168833be1 100644 (file)
@@ -5,12 +5,23 @@
 #include <asm/text-patching.h>
 
 /*
+ * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which
+ * uses the current value of the key->func pointer to do an indirect jump to
+ * the function.  This trampoline is only used during boot, before the call
+ * sites get patched by static_call_update().  The name of this trampoline has
+ * a magical aspect: objtool uses it to find static call sites so it can create
+ * the .static_call_sites section.
+ *
  * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which
  * does a direct jump to the function.  The direct jump gets patched by
  * static_call_update().
+ *
+ * Having the trampoline in a special section forces GCC to emit a JMP.d32 when
+ * it does tail-call optimization on the call; since you cannot compute the
+ * relative displacement across sections.
  */
 #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)                      \
-       asm(".pushsection .text, \"ax\"                         \n"     \
+       asm(".pushsection .static_call.text, \"ax\"             \n"     \
            ".align 4                                           \n"     \
            ".globl " STATIC_CALL_TRAMP_STR(name) "             \n"     \
            STATIC_CALL_TRAMP_STR(name) ":                      \n"     \
index 0565825970af6e7cb6c5e797c4661b19a14d127d..5ff2b639a1a637a0db9fb85ee9dd7616b47b6889 100644 (file)
@@ -26,6 +26,9 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
        if (tramp)
                __static_call_transform(tramp, JMP32_INSN_OPCODE, func);
 
+       if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
+               __static_call_transform(site, CALL_INSN_OPCODE, func);
+
        mutex_unlock(&text_mutex);
 }
 EXPORT_SYMBOL_GPL(arch_static_call_transform);
index 9a03e5b23135af92649938ea8a9dfc1c0443f21b..2568f4cdcbd10c8efc724c65aeac02f2239c0f5b 100644 (file)
@@ -136,6 +136,7 @@ SECTIONS
                ENTRY_TEXT
                ALIGN_ENTRY_TEXT_END
                SOFTIRQENTRY_TEXT
+               STATIC_CALL_TEXT
                *(.fixup)
                *(.gnu.warning)
 
index 0088a5cd6a40c98681104b6e4c683f7f943a6618..0502087654d775ee417663c0712c7268b4162393 100644 (file)
                *(.softirqentry.text)                                   \
                __softirqentry_text_end = .;
 
+#define STATIC_CALL_TEXT                                               \
+               ALIGN_FUNCTION();                                       \
+               __static_call_text_start = .;                           \
+               *(.static_call.text)                                    \
+               __static_call_text_end = .;
+
 /* Section used for early init (in .S files) */
 #define HEAD_TEXT  KEEP(*(.head.text))
 
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
new file mode 100644 (file)
index 0000000..408d345
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _STATIC_CALL_TYPES_H
+#define _STATIC_CALL_TYPES_H
+
+#include <linux/types.h>
+#include <linux/stringify.h>
+
+#define STATIC_CALL_KEY_PREFIX         __SCK__
+#define STATIC_CALL_KEY_PREFIX_STR     __stringify(STATIC_CALL_KEY_PREFIX)
+#define STATIC_CALL_KEY_PREFIX_LEN     (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1)
+#define STATIC_CALL_KEY(name)          __PASTE(STATIC_CALL_KEY_PREFIX, name)
+
+#define STATIC_CALL_TRAMP_PREFIX       __SCT__
+#define STATIC_CALL_TRAMP_PREFIX_STR   __stringify(STATIC_CALL_TRAMP_PREFIX)
+#define STATIC_CALL_TRAMP_PREFIX_LEN   (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1)
+#define STATIC_CALL_TRAMP(name)                __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
+#define STATIC_CALL_TRAMP_STR(name)    __stringify(STATIC_CALL_TRAMP(name))
+
+/*
+ * The static call site table needs to be created by external tooling (objtool
+ * or a compiler plugin).
+ */
+struct static_call_site {
+       s32 addr;
+       s32 key;
+};
+
+#endif /* _STATIC_CALL_TYPES_H */
index e034a8f24f46689a113a3fde6d6c1355e399c69c..f8f7a40c6ef359b6c24c55d17ac01f5f73167000 100644 (file)
@@ -16,6 +16,7 @@
 
 #include <linux/hashtable.h>
 #include <linux/kernel.h>
+#include <linux/static_call_types.h>
 
 #define FAKE_JUMP_OFFSET -1
 
@@ -433,6 +434,103 @@ reachable:
        return 0;
 }
 
+static int create_static_call_sections(struct objtool_file *file)
+{
+       struct section *sec, *reloc_sec;
+       struct reloc *reloc;
+       struct static_call_site *site;
+       struct instruction *insn;
+       struct symbol *key_sym;
+       char *key_name, *tmp;
+       int idx;
+
+       sec = find_section_by_name(file->elf, ".static_call_sites");
+       if (sec) {
+               INIT_LIST_HEAD(&file->static_call_list);
+               WARN("file already has .static_call_sites section, skipping");
+               return 0;
+       }
+
+       if (list_empty(&file->static_call_list))
+               return 0;
+
+       idx = 0;
+       list_for_each_entry(insn, &file->static_call_list, static_call_node)
+               idx++;
+
+       sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
+                                sizeof(struct static_call_site), idx);
+       if (!sec)
+               return -1;
+
+       reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+       if (!reloc_sec)
+               return -1;
+
+       idx = 0;
+       list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+
+               site = (struct static_call_site *)sec->data->d_buf + idx;
+               memset(site, 0, sizeof(struct static_call_site));
+
+               /* populate reloc for 'addr' */
+               reloc = malloc(sizeof(*reloc));
+               if (!reloc) {
+                       perror("malloc");
+                       return -1;
+               }
+               memset(reloc, 0, sizeof(*reloc));
+               reloc->sym = insn->sec->sym;
+               reloc->addend = insn->offset;
+               reloc->type = R_X86_64_PC32;
+               reloc->offset = idx * sizeof(struct static_call_site);
+               reloc->sec = reloc_sec;
+               elf_add_reloc(file->elf, reloc);
+
+               /* find key symbol */
+               key_name = strdup(insn->call_dest->name);
+               if (!key_name) {
+                       perror("strdup");
+                       return -1;
+               }
+               if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
+                           STATIC_CALL_TRAMP_PREFIX_LEN)) {
+                       WARN("static_call: trampoline name malformed: %s", key_name);
+                       return -1;
+               }
+               tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
+               memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN);
+
+               key_sym = find_symbol_by_name(file->elf, tmp);
+               if (!key_sym) {
+                       WARN("static_call: can't find static_call_key symbol: %s", tmp);
+                       return -1;
+               }
+               free(key_name);
+
+               /* populate reloc for 'key' */
+               reloc = malloc(sizeof(*reloc));
+               if (!reloc) {
+                       perror("malloc");
+                       return -1;
+               }
+               memset(reloc, 0, sizeof(*reloc));
+               reloc->sym = key_sym;
+               reloc->addend = 0;
+               reloc->type = R_X86_64_PC32;
+               reloc->offset = idx * sizeof(struct static_call_site) + 4;
+               reloc->sec = reloc_sec;
+               elf_add_reloc(file->elf, reloc);
+
+               idx++;
+       }
+
+       if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+               return -1;
+
+       return 0;
+}
+
 /*
  * Warnings shouldn't be reported for ignored functions.
  */
@@ -1522,6 +1620,23 @@ static int read_intra_function_calls(struct objtool_file *file)
        return 0;
 }
 
+static int read_static_call_tramps(struct objtool_file *file)
+{
+       struct section *sec;
+       struct symbol *func;
+
+       for_each_sec(file, sec) {
+               list_for_each_entry(func, &sec->symbol_list, list) {
+                       if (func->bind == STB_GLOBAL &&
+                           !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
+                                    strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
+                               func->static_call_tramp = true;
+               }
+       }
+
+       return 0;
+}
+
 static void mark_rodata(struct objtool_file *file)
 {
        struct section *sec;
@@ -1601,6 +1716,10 @@ static int decode_sections(struct objtool_file *file)
        if (ret)
                return ret;
 
+       ret = read_static_call_tramps(file);
+       if (ret)
+               return ret;
+
        return 0;
 }
 
@@ -2432,6 +2551,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                        if (dead_end_function(file, insn->call_dest))
                                return 0;
 
+                       if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) {
+                               list_add_tail(&insn->static_call_node,
+                                             &file->static_call_list);
+                       }
+
                        break;
 
                case INSN_JUMP_CONDITIONAL:
@@ -2791,6 +2915,7 @@ int check(const char *_objname, bool orc)
 
        INIT_LIST_HEAD(&file.insn_list);
        hash_init(file.insn_hash);
+       INIT_LIST_HEAD(&file.static_call_list);
        file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
        file.ignore_unreachables = no_unreachable;
        file.hints = false;
@@ -2838,6 +2963,11 @@ int check(const char *_objname, bool orc)
                warnings += ret;
        }
 
+       ret = create_static_call_sections(&file);
+       if (ret < 0)
+               goto out;
+       warnings += ret;
+
        if (orc) {
                ret = create_orc(&file);
                if (ret < 0)
index 061aa96e15d37a3f7c143cb0d47f32a58f06ae19..36d38b9153acefe511cfacb7c5d4b100195cf214 100644 (file)
@@ -22,6 +22,7 @@ struct insn_state {
 struct instruction {
        struct list_head list;
        struct hlist_node hash;
+       struct list_head static_call_node;
        struct section *sec;
        unsigned long offset;
        unsigned int len;
index 3ddbd66f1a37611dd0021c8017755a814025f34a..4e1d7460574b4ab6a532f4e68fe0208b1e9eaf7a 100644 (file)
@@ -652,7 +652,7 @@ err:
 }
 
 struct section *elf_create_section(struct elf *elf, const char *name,
-                                  size_t entsize, int nr)
+                                  unsigned int sh_flags, size_t entsize, int nr)
 {
        struct section *sec, *shstrtab;
        size_t size = entsize * nr;
@@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
        sec->sh.sh_entsize = entsize;
        sec->sh.sh_type = SHT_PROGBITS;
        sec->sh.sh_addralign = 1;
-       sec->sh.sh_flags = SHF_ALLOC;
+       sec->sh.sh_flags = SHF_ALLOC | sh_flags;
 
 
        /* Add section name to .shstrtab (or .strtab for Clang) */
@@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect
        strcpy(relocname, ".rel");
        strcat(relocname, base->name);
 
-       sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0);
+       sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0);
        free(relocname);
        if (!sec)
                return NULL;
@@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
        strcpy(relocname, ".rela");
        strcat(relocname, base->name);
 
-       sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0);
+       sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0);
        free(relocname);
        if (!sec)
                return NULL;
index 6cc80a0751668546c7dce140967c5b31181f9108..807f8c670097473b569ba015cb64e79c6f3da45d 100644 (file)
@@ -56,6 +56,7 @@ struct symbol {
        unsigned int len;
        struct symbol *pfunc, *cfunc, *alias;
        bool uaccess_safe;
+       bool static_call_tramp;
 };
 
 struct reloc {
@@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc)
 }
 
 struct elf *elf_open_read(const char *name, int flags);
-struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr);
+struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
 struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
 void elf_add_reloc(struct elf *elf, struct reloc *reloc);
 int elf_write_insn(struct elf *elf, struct section *sec,
index 528028a668165e89947b0b719c5d0739a8e85963..9a7cd0b88bd83e1f3e4668b7b998cc0e8c1d2a56 100644 (file)
@@ -16,6 +16,7 @@ struct objtool_file {
        struct elf *elf;
        struct list_head insn_list;
        DECLARE_HASHTABLE(insn_hash, 20);
+       struct list_head static_call_list;
        bool ignore_unreachables, c_file, hints, rodata;
 };
 
index 968f55e6dd94d45bdcb7c8abcf1f5748f5ad0f81..e6b2363c2e03b961ed09c4d4ac6d2486c7efe343 100644 (file)
@@ -177,7 +177,7 @@ int create_orc_sections(struct objtool_file *file)
 
 
        /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
-       sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+       sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
        if (!sec)
                return -1;
 
@@ -186,7 +186,7 @@ int create_orc_sections(struct objtool_file *file)
                return -1;
 
        /* create .orc_unwind section */
-       u_sec = elf_create_section(file->elf, ".orc_unwind",
+       u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
                                   sizeof(struct orc_entry), idx);
 
        /* populate sections */
index 2a1261bfbb625302d81a8fa2e26efb1cb91ffd9d..aa099b21dffa6ee024595b66ff021f9102bb0c16 100755 (executable)
@@ -7,6 +7,7 @@ arch/x86/include/asm/orc_types.h
 arch/x86/include/asm/emulate_prefix.h
 arch/x86/lib/x86-opcode-map.txt
 arch/x86/tools/gen-insn-attr-x86.awk
+include/linux/static_call_types.h
 '
 
 check_2 () {