static_call: Add inline static call infrastructure
authorJosh Poimboeuf <jpoimboe@redhat.com>
Tue, 18 Aug 2020 13:57:42 +0000 (15:57 +0200)
committerIngo Molnar <mingo@kernel.org>
Tue, 1 Sep 2020 07:58:04 +0000 (09:58 +0200)
Add infrastructure for an arch-specific CONFIG_HAVE_STATIC_CALL_INLINE
option, which is a faster version of CONFIG_HAVE_STATIC_CALL.  At
runtime, the static call sites are patched directly, rather than using
the out-of-line trampolines.

Compared to out-of-line static calls, the performance benefits are more
modest, but still measurable.  Steven Rostedt did some tracepoint
measurements:

  https://lkml.kernel.org/r/20181126155405.72b4f718@gandalf.local.home

This code is heavily inspired by the jump label code (aka "static
jumps"), as some of the concepts are very similar.

For more details, see the comments in include/linux/static_call.h.

[peterz: simplified interface; merged trampolines]

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20200818135804.684334440@infradead.org
arch/Kconfig
include/asm-generic/vmlinux.lds.h
include/linux/module.h
include/linux/static_call.h
include/linux/static_call_types.h
kernel/Makefile
kernel/module.c
kernel/static_call.c [new file with mode: 0644]

index 806e6df7b47cc49a9ab6661d79d577a9e4a1473c..2c4936ac620ae59ba85cb0e468b27bc4f2302681 100644 (file)
@@ -978,6 +978,10 @@ config ARCH_HAS_VDSO_DATA
 config HAVE_STATIC_CALL
        bool
 
+config HAVE_STATIC_CALL_INLINE
+       bool
+       depends on HAVE_STATIC_CALL
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
index 5430febd34beecea9e6c50126b78c9fb7a965758..0088a5cd6a40c98681104b6e4c683f7f943a6618 100644 (file)
        KEEP(*(__jump_table))                                           \
        __stop___jump_table = .;
 
+#define STATIC_CALL_DATA                                               \
+       . = ALIGN(8);                                                   \
+       __start_static_call_sites = .;                                  \
+       KEEP(*(.static_call_sites))                                     \
+       __stop_static_call_sites = .;
+
 /*
  * Allow architectures to handle ro_after_init data on their
  * own by defining an empty RO_AFTER_INIT_DATA.
        __start_ro_after_init = .;                                      \
        *(.data..ro_after_init)                                         \
        JUMP_TABLE_DATA                                                 \
+       STATIC_CALL_DATA                                                \
        __end_ro_after_init = .;
 #endif
 
index e30ed5fa33a7383a6d123d6dcb3eb59d6c813900..a29187f7c360e0ffb0ab6b9bcad8b3f0decfd3d1 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/error-injection.h>
 #include <linux/tracepoint-defs.h>
 #include <linux/srcu.h>
+#include <linux/static_call_types.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -498,6 +499,10 @@ struct module {
        unsigned long *kprobe_blacklist;
        unsigned int num_kprobe_blacklist;
 #endif
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+       int num_static_call_sites;
+       struct static_call_site *static_call_sites;
+#endif
 
 #ifdef CONFIG_LIVEPATCH
        bool klp; /* Is this a livepatch module? */
index d8892dff2e91f80c4aa83933742f6ea6b32c45cf..0d7f9efaa3b2592b650854d00401b6e0c8cefec4 100644 (file)
@@ -95,7 +95,41 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func);
                             STATIC_CALL_TRAMP_ADDR(name), func);       \
 })
 
-#if defined(CONFIG_HAVE_STATIC_CALL)
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+
+struct static_call_mod {
+       struct static_call_mod *next;
+       struct module *mod; /* for vmlinux, mod == NULL */
+       struct static_call_site *sites;
+};
+
+struct static_call_key {
+       void *func;
+       struct static_call_mod *mods;
+};
+
+extern void __static_call_update(struct static_call_key *key, void *tramp, void *func);
+extern int static_call_mod_init(struct module *mod);
+
+#define DEFINE_STATIC_CALL(name, _func)                                        \
+       DECLARE_STATIC_CALL(name, _func);                               \
+       struct static_call_key STATIC_CALL_KEY(name) = {                \
+               .func = _func,                                          \
+               .mods = NULL,                                           \
+       };                                                              \
+       ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
+
+#define static_call(name)      __static_call(name)
+
+#define EXPORT_STATIC_CALL(name)                                       \
+       EXPORT_SYMBOL(STATIC_CALL_KEY(name));                           \
+       EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
+
+#define EXPORT_STATIC_CALL_GPL(name)                                   \
+       EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name));                       \
+       EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
+
+#elif defined(CONFIG_HAVE_STATIC_CALL)
 
 struct static_call_key {
        void *func;
index 5ed249dc47d304001c8ec705175bdd723a02002b..408d345d83e1c80c80f385cd38ab4ec039a21774 100644 (file)
@@ -2,14 +2,27 @@
 #ifndef _STATIC_CALL_TYPES_H
 #define _STATIC_CALL_TYPES_H
 
+#include <linux/types.h>
 #include <linux/stringify.h>
 
 #define STATIC_CALL_KEY_PREFIX         __SCK__
+#define STATIC_CALL_KEY_PREFIX_STR     __stringify(STATIC_CALL_KEY_PREFIX)
+#define STATIC_CALL_KEY_PREFIX_LEN     (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1)
 #define STATIC_CALL_KEY(name)          __PASTE(STATIC_CALL_KEY_PREFIX, name)
 
 #define STATIC_CALL_TRAMP_PREFIX       __SCT__
 #define STATIC_CALL_TRAMP_PREFIX_STR   __stringify(STATIC_CALL_TRAMP_PREFIX)
+#define STATIC_CALL_TRAMP_PREFIX_LEN   (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1)
 #define STATIC_CALL_TRAMP(name)                __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
 #define STATIC_CALL_TRAMP_STR(name)    __stringify(STATIC_CALL_TRAMP(name))
 
+/*
+ * The static call site table needs to be created by external tooling (objtool
+ * or a compiler plugin).
+ */
+struct static_call_site {
+       s32 addr;
+       s32 key;
+};
+
 #endif /* _STATIC_CALL_TYPES_H */
index 9a20016d4900d12cbdbd400018b50b288b3c92ef..b74820d8b26437cc0da61853a1c9cc1f4692e9e6 100644 (file)
@@ -111,6 +111,7 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_BPF) += bpf/
 obj-$(CONFIG_KCSAN) += kcsan/
 obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
+obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
 
index 3c465cf31d0804c512beca4106ff36b598897564..c075a18103fbf66646baccabfe351c5e13b9573c 100644 (file)
@@ -3274,6 +3274,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
        mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist",
                                                sizeof(unsigned long),
                                                &mod->num_kprobe_blacklist);
+#endif
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+       mod->static_call_sites = section_objs(info, ".static_call_sites",
+                                             sizeof(*mod->static_call_sites),
+                                             &mod->num_static_call_sites);
 #endif
        mod->extable = section_objs(info, "__ex_table",
                                    sizeof(*mod->extable), &mod->num_exentries);
diff --git a/kernel/static_call.c b/kernel/static_call.c
new file mode 100644 (file)
index 0000000..d243492
--- /dev/null
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/static_call.h>
+#include <linux/bug.h>
+#include <linux/smp.h>
+#include <linux/sort.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/processor.h>
+#include <asm/sections.h>
+
+extern struct static_call_site __start_static_call_sites[],
+                              __stop_static_call_sites[];
+
+static bool static_call_initialized;
+
+#define STATIC_CALL_INIT 1UL
+
+/* mutex to protect key modules/sites */
+static DEFINE_MUTEX(static_call_mutex);
+
+static void static_call_lock(void)
+{
+       mutex_lock(&static_call_mutex);
+}
+
+static void static_call_unlock(void)
+{
+       mutex_unlock(&static_call_mutex);
+}
+
+static inline void *static_call_addr(struct static_call_site *site)
+{
+       return (void *)((long)site->addr + (long)&site->addr);
+}
+
+
+static inline struct static_call_key *static_call_key(const struct static_call_site *site)
+{
+       return (struct static_call_key *)
+               (((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT);
+}
+
+/* These assume the key is word-aligned. */
+static inline bool static_call_is_init(struct static_call_site *site)
+{
+       return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT;
+}
+
+static inline void static_call_set_init(struct static_call_site *site)
+{
+       site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) -
+                   (long)&site->key;
+}
+
+static int static_call_site_cmp(const void *_a, const void *_b)
+{
+       const struct static_call_site *a = _a;
+       const struct static_call_site *b = _b;
+       const struct static_call_key *key_a = static_call_key(a);
+       const struct static_call_key *key_b = static_call_key(b);
+
+       if (key_a < key_b)
+               return -1;
+
+       if (key_a > key_b)
+               return 1;
+
+       return 0;
+}
+
+static void static_call_site_swap(void *_a, void *_b, int size)
+{
+       long delta = (unsigned long)_a - (unsigned long)_b;
+       struct static_call_site *a = _a;
+       struct static_call_site *b = _b;
+       struct static_call_site tmp = *a;
+
+       a->addr = b->addr  - delta;
+       a->key  = b->key   - delta;
+
+       b->addr = tmp.addr + delta;
+       b->key  = tmp.key  + delta;
+}
+
+static inline void static_call_sort_entries(struct static_call_site *start,
+                                           struct static_call_site *stop)
+{
+       sort(start, stop - start, sizeof(struct static_call_site),
+            static_call_site_cmp, static_call_site_swap);
+}
+
+void __static_call_update(struct static_call_key *key, void *tramp, void *func)
+{
+       struct static_call_site *site, *stop;
+       struct static_call_mod *site_mod;
+
+       cpus_read_lock();
+       static_call_lock();
+
+       if (key->func == func)
+               goto done;
+
+       key->func = func;
+
+       arch_static_call_transform(NULL, tramp, func);
+
+       /*
+        * If uninitialized, we'll not update the callsites, but they still
+        * point to the trampoline and we just patched that.
+        */
+       if (WARN_ON_ONCE(!static_call_initialized))
+               goto done;
+
+       for (site_mod = key->mods; site_mod; site_mod = site_mod->next) {
+               struct module *mod = site_mod->mod;
+
+               if (!site_mod->sites) {
+                       /*
+                        * This can happen if the static call key is defined in
+                        * a module which doesn't use it.
+                        */
+                       continue;
+               }
+
+               stop = __stop_static_call_sites;
+
+#ifdef CONFIG_MODULES
+               if (mod) {
+                       stop = mod->static_call_sites +
+                              mod->num_static_call_sites;
+               }
+#endif
+
+               for (site = site_mod->sites;
+                    site < stop && static_call_key(site) == key; site++) {
+                       void *site_addr = static_call_addr(site);
+
+                       if (static_call_is_init(site)) {
+                               /*
+                                * Don't write to call sites which were in
+                                * initmem and have since been freed.
+                                */
+                               if (!mod && system_state >= SYSTEM_RUNNING)
+                                       continue;
+                               if (mod && !within_module_init((unsigned long)site_addr, mod))
+                                       continue;
+                       }
+
+                       if (!kernel_text_address((unsigned long)site_addr)) {
+                               WARN_ONCE(1, "can't patch static call site at %pS",
+                                         site_addr);
+                               continue;
+                       }
+
+                       arch_static_call_transform(site_addr, NULL, func);
+               }
+       }
+
+done:
+       static_call_unlock();
+       cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(__static_call_update);
+
+static int __static_call_init(struct module *mod,
+                             struct static_call_site *start,
+                             struct static_call_site *stop)
+{
+       struct static_call_site *site;
+       struct static_call_key *key, *prev_key = NULL;
+       struct static_call_mod *site_mod;
+
+       if (start == stop)
+               return 0;
+
+       static_call_sort_entries(start, stop);
+
+       for (site = start; site < stop; site++) {
+               void *site_addr = static_call_addr(site);
+
+               if ((mod && within_module_init((unsigned long)site_addr, mod)) ||
+                   (!mod && init_section_contains(site_addr, 1)))
+                       static_call_set_init(site);
+
+               key = static_call_key(site);
+               if (key != prev_key) {
+                       prev_key = key;
+
+                       site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+                       if (!site_mod)
+                               return -ENOMEM;
+
+                       site_mod->mod = mod;
+                       site_mod->sites = site;
+                       site_mod->next = key->mods;
+                       key->mods = site_mod;
+               }
+
+               arch_static_call_transform(site_addr, NULL, key->func);
+       }
+
+       return 0;
+}
+
+#ifdef CONFIG_MODULES
+
+static int static_call_add_module(struct module *mod)
+{
+       return __static_call_init(mod, mod->static_call_sites,
+                                 mod->static_call_sites + mod->num_static_call_sites);
+}
+
+static void static_call_del_module(struct module *mod)
+{
+       struct static_call_site *start = mod->static_call_sites;
+       struct static_call_site *stop = mod->static_call_sites +
+                                       mod->num_static_call_sites;
+       struct static_call_key *key, *prev_key = NULL;
+       struct static_call_mod *site_mod, **prev;
+       struct static_call_site *site;
+
+       for (site = start; site < stop; site++) {
+               key = static_call_key(site);
+               if (key == prev_key)
+                       continue;
+
+               prev_key = key;
+
+               for (prev = &key->mods, site_mod = key->mods;
+                    site_mod && site_mod->mod != mod;
+                    prev = &site_mod->next, site_mod = site_mod->next)
+                       ;
+
+               if (!site_mod)
+                       continue;
+
+               *prev = site_mod->next;
+               kfree(site_mod);
+       }
+}
+
+static int static_call_module_notify(struct notifier_block *nb,
+                                    unsigned long val, void *data)
+{
+       struct module *mod = data;
+       int ret = 0;
+
+       cpus_read_lock();
+       static_call_lock();
+
+       switch (val) {
+       case MODULE_STATE_COMING:
+               ret = static_call_add_module(mod);
+               if (ret) {
+                       WARN(1, "Failed to allocate memory for static calls");
+                       static_call_del_module(mod);
+               }
+               break;
+       case MODULE_STATE_GOING:
+               static_call_del_module(mod);
+               break;
+       }
+
+       static_call_unlock();
+       cpus_read_unlock();
+
+       return notifier_from_errno(ret);
+}
+
+static struct notifier_block static_call_module_nb = {
+       .notifier_call = static_call_module_notify,
+};
+
+#endif /* CONFIG_MODULES */
+
+static void __init static_call_init(void)
+{
+       int ret;
+
+       if (static_call_initialized)
+               return;
+
+       cpus_read_lock();
+       static_call_lock();
+       ret = __static_call_init(NULL, __start_static_call_sites,
+                                __stop_static_call_sites);
+       static_call_unlock();
+       cpus_read_unlock();
+
+       if (ret) {
+               pr_err("Failed to allocate memory for static_call!\n");
+               BUG();
+       }
+
+       static_call_initialized = true;
+
+#ifdef CONFIG_MODULES
+       register_module_notifier(&static_call_module_nb);
+#endif
+}
+early_initcall(static_call_init);