kprobes: Cure hotplug lock ordering issues
authorThomas Gleixner <tglx@linutronix.de>
Wed, 24 May 2017 08:15:36 +0000 (10:15 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Fri, 26 May 2017 08:10:45 +0000 (10:10 +0200)
Converting the cpu hotplug locking to a percpu rwsem unearthed hidden lock
ordering problems.

There is a wide range of locks involved in this: kprobe_mutex,
jump_label_mutex, ftrace_lock, text_mutex, event_mutex, module_mutex,
func_hash->regex_lock and a gazillion of lock order permutations with
nested get_online_cpus() calls.

Some of those permutations are potential deadlocks even with the current
nesting hotplug locking scheme, but they can't be discovered by lockdep.

The conversion of the hotplug locking to a percpu rwsem requires to prevent
nested locking, so it's required to take the hotplug rwsem early in the
call chain and establish a proper lock order.

After quite some analysis and going down the wrong road severa times the
following lock order has been chosen:

kprobe_mutex -> cpus_rwsem -> jump_label_mutex -> text_mutex

For kprobes which hook on an ftrace function trace point, it's required to
drop cpus_rwsem before calling into the ftrace code to avoid a deadlock on
the func_hash->regex_lock.

[ Steven: Ftrace interaction fixes ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Link: http://lkml.kernel.org/r/20170524081549.104864779@linutronix.de
kernel/kprobes.c

index 2d2d3a5..9f60567 100644 (file)
@@ -483,11 +483,6 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
  */
 static void do_optimize_kprobes(void)
 {
-       /* Optimization never be done when disarmed */
-       if (kprobes_all_disarmed || !kprobes_allow_optimization ||
-           list_empty(&optimizing_list))
-               return;
-
        /*
         * The optimization/unoptimization refers online_cpus via
         * stop_machine() and cpu-hotplug modifies online_cpus.
@@ -495,14 +490,19 @@ static void do_optimize_kprobes(void)
         * This combination can cause a deadlock (cpu-hotplug try to lock
         * text_mutex but stop_machine can not be done because online_cpus
         * has been changed)
-        * To avoid this deadlock, we need to call get_online_cpus()
+        * To avoid this deadlock, caller must have locked cpu hotplug
         * for preventing cpu-hotplug outside of text_mutex locking.
         */
-       get_online_cpus();
+       lockdep_assert_cpus_held();
+
+       /* Optimization never be done when disarmed */
+       if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+           list_empty(&optimizing_list))
+               return;
+
        mutex_lock(&text_mutex);
        arch_optimize_kprobes(&optimizing_list);
        mutex_unlock(&text_mutex);
-       put_online_cpus();
 }
 
 /*
@@ -513,12 +513,13 @@ static void do_unoptimize_kprobes(void)
 {
        struct optimized_kprobe *op, *tmp;
 
+       /* See comment in do_optimize_kprobes() */
+       lockdep_assert_cpus_held();
+
        /* Unoptimization must be done anytime */
        if (list_empty(&unoptimizing_list))
                return;
 
-       /* Ditto to do_optimize_kprobes */
-       get_online_cpus();
        mutex_lock(&text_mutex);
        arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
        /* Loop free_list for disarming */
@@ -537,7 +538,6 @@ static void do_unoptimize_kprobes(void)
                        list_del_init(&op->list);
        }
        mutex_unlock(&text_mutex);
-       put_online_cpus();
 }
 
 /* Reclaim all kprobes on the free_list */
@@ -562,6 +562,7 @@ static void kick_kprobe_optimizer(void)
 static void kprobe_optimizer(struct work_struct *work)
 {
        mutex_lock(&kprobe_mutex);
+       cpus_read_lock();
        /* Lock modules while optimizing kprobes */
        mutex_lock(&module_mutex);
 
@@ -587,6 +588,7 @@ static void kprobe_optimizer(struct work_struct *work)
        do_free_cleaned_kprobes();
 
        mutex_unlock(&module_mutex);
+       cpus_read_unlock();
        mutex_unlock(&kprobe_mutex);
 
        /* Step 5: Kick optimizer again if needed */
@@ -650,9 +652,8 @@ static void optimize_kprobe(struct kprobe *p)
 /* Short cut to direct unoptimizing */
 static void force_unoptimize_kprobe(struct optimized_kprobe *op)
 {
-       get_online_cpus();
+       lockdep_assert_cpus_held();
        arch_unoptimize_kprobe(op);
-       put_online_cpus();
        if (kprobe_disabled(&op->kp))
                arch_disarm_kprobe(&op->kp);
 }
@@ -791,6 +792,7 @@ static void try_to_optimize_kprobe(struct kprobe *p)
                return;
 
        /* For preparing optimization, jump_label_text_reserved() is called */
+       cpus_read_lock();
        jump_label_lock();
        mutex_lock(&text_mutex);
 
@@ -812,6 +814,7 @@ static void try_to_optimize_kprobe(struct kprobe *p)
 out:
        mutex_unlock(&text_mutex);
        jump_label_unlock();
+       cpus_read_unlock();
 }
 
 #ifdef CONFIG_SYSCTL
@@ -826,6 +829,7 @@ static void optimize_all_kprobes(void)
        if (kprobes_allow_optimization)
                goto out;
 
+       cpus_read_lock();
        kprobes_allow_optimization = true;
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
@@ -833,6 +837,7 @@ static void optimize_all_kprobes(void)
                        if (!kprobe_disabled(p))
                                optimize_kprobe(p);
        }
+       cpus_read_unlock();
        printk(KERN_INFO "Kprobes globally optimized\n");
 out:
        mutex_unlock(&kprobe_mutex);
@@ -851,6 +856,7 @@ static void unoptimize_all_kprobes(void)
                return;
        }
 
+       cpus_read_lock();
        kprobes_allow_optimization = false;
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
@@ -859,6 +865,7 @@ static void unoptimize_all_kprobes(void)
                                unoptimize_kprobe(p, false);
                }
        }
+       cpus_read_unlock();
        mutex_unlock(&kprobe_mutex);
 
        /* Wait for unoptimizing completion */
@@ -1010,14 +1017,11 @@ static void arm_kprobe(struct kprobe *kp)
                arm_kprobe_ftrace(kp);
                return;
        }
-       /*
-        * Here, since __arm_kprobe() doesn't use stop_machine(),
-        * this doesn't cause deadlock on text_mutex. So, we don't
-        * need get_online_cpus().
-        */
+       cpus_read_lock();
        mutex_lock(&text_mutex);
        __arm_kprobe(kp);
        mutex_unlock(&text_mutex);
+       cpus_read_unlock();
 }
 
 /* Disarm a kprobe with text_mutex */
@@ -1027,10 +1031,12 @@ static void disarm_kprobe(struct kprobe *kp, bool reopt)
                disarm_kprobe_ftrace(kp);
                return;
        }
-       /* Ditto */
+
+       cpus_read_lock();
        mutex_lock(&text_mutex);
        __disarm_kprobe(kp, reopt);
        mutex_unlock(&text_mutex);
+       cpus_read_unlock();
 }
 
 /*
@@ -1298,13 +1304,10 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
        int ret = 0;
        struct kprobe *ap = orig_p;
 
+       cpus_read_lock();
+
        /* For preparing optimization, jump_label_text_reserved() is called */
        jump_label_lock();
-       /*
-        * Get online CPUs to avoid text_mutex deadlock.with stop machine,
-        * which is invoked by unoptimize_kprobe() in add_new_kprobe()
-        */
-       get_online_cpus();
        mutex_lock(&text_mutex);
 
        if (!kprobe_aggrprobe(orig_p)) {
@@ -1352,8 +1355,8 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
 
 out:
        mutex_unlock(&text_mutex);
-       put_online_cpus();
        jump_label_unlock();
+       cpus_read_unlock();
 
        if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
                ap->flags &= ~KPROBE_FLAG_DISABLED;
@@ -1555,9 +1558,12 @@ int register_kprobe(struct kprobe *p)
                goto out;
        }
 
-       mutex_lock(&text_mutex);        /* Avoiding text modification */
+       cpus_read_lock();
+       /* Prevent text modification */
+       mutex_lock(&text_mutex);
        ret = prepare_kprobe(p);
        mutex_unlock(&text_mutex);
+       cpus_read_unlock();
        if (ret)
                goto out;
 
@@ -1570,7 +1576,6 @@ int register_kprobe(struct kprobe *p)
 
        /* Try to optimize kprobe */
        try_to_optimize_kprobe(p);
-
 out:
        mutex_unlock(&kprobe_mutex);